# LIBRARIES

In [1]:
# Install required libraries
!pip install yfinance pandas numpy matplotlib scikit-learn tensorflow




In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout


**Retrieve YAHOO FINANCE data**

In [3]:
# Download gold price data for the last 5 years
gold_data = yf.download('GC=F', period='5y', interval='1d')

# Display the first few rows
gold_data.head()


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,GC=F,GC=F,GC=F,GC=F,GC=F
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-02-05,1557.800049,1560.699951,1547.699951,1553.199951,1305
2020-02-06,1565.099976,1565.599976,1551.599976,1553.599976,110
2020-02-07,1568.599976,1571.900024,1560.5,1564.900024,431
2020-02-10,1574.699951,1574.699951,1568.599976,1569.300049,128
2020-02-11,1565.599976,1571.099976,1561.900024,1570.900024,440


Match with other data **economic indicators**

In [5]:
# Assuming you have some economic indicators (for example, dummy data)
economic_data = {
    'Date': pd.date_range(start='2017-09-01', periods=len(gold_data), freq='D'),
    'InflationRate': np.random.uniform(1.0, 3.0, len(gold_data)),
    'UnemploymentRate': np.random.uniform(3.0, 7.0, len(gold_data)),
    'GeopoliticalRiskIndex': np.random.uniform(50, 150, len(gold_data))
}

# Convert to DataFrame
economic_df = pd.DataFrame(economic_data)

# Merge with gold price data on Date
# Reset index of gold_data to make Date a column
gold_data = gold_data.reset_index()

# Merge with economic_df on 'Date'
gold_data = gold_data.merge(economic_df, on='Date')

# Set the index back to 'Date'
gold_data.set_index('Date', inplace=True)

# Display the first few rows
gold_data.head()



MergeError: Not allowed to merge between different levels. (2 levels on the left, 1 on the right)

In [None]:
# Selecting features (including Close price and additional economic indicators)
features = ['Close', 'InflationRate', 'UnemploymentRate', 'GeopoliticalRiskIndex']
data = gold_data[features].values

# Scaling the features to be in the range [0, 1]
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Preparing the data for LSTM
# We use 60 previous days' data to predict the next day's price
X = []
y = []

for i in range(60, len(scaled_data)):
    X.append(scaled_data[i-60:i])
    y.append(scaled_data[i, 0])  # Predict 'Close' price

X, y = np.array(X), np.array(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Reshape X_train and X_test for LSTM input (samples, time steps, features)
print("Training data shape:", X_train.shape)


In [None]:
# Build the LSTM model
model = Sequential()

# First LSTM layer with Dropout regularization
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))

# Second LSTM layer
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), verbose=1)


In [None]:
from keras.metrics import MeanAbsoluteError

# Compile the model with additional metrics
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[MeanAbsoluteError()])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), verbose=1)


In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(12, 6))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# MAE Plot
plt.subplot(1, 2, 2)
plt.plot(history.history['mean_absolute_error'], label='Train MAE')
plt.plot(history.history['val_mean_absolute_error'], label='Val MAE')
plt.title('Mean Absolute Error')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(12, 6))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# MAE Plot
plt.subplot(1, 2, 2)
plt.plot(history.history['mean_absolute_error'], label='Train MAE')
plt.plot(history.history['val_mean_absolute_error'], label='Val MAE')
plt.title('Mean Absolute Error')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Predict the gold prices on the test set
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(np.concatenate((predicted_prices, np.zeros((predicted_prices.shape[0], scaled_data.shape[1] - 1))), axis=1))[:, 0]

# Inverse scaling of test set (actual prices)
actual_prices = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], scaled_data.shape[1] - 1))), axis=1))[:, 0]

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(actual_prices, color='black', label='Actual Gold Prices')
plt.plot(predicted_prices, color='green', label='Predicted Gold Prices')
plt.title('Gold Price Prediction')
plt.xlabel('Time (Days)')
plt.ylabel('Gold Price')
plt.legend()
plt.show()


In [None]:
# Predict future prices based on the latest data
last_60_days = scaled_data[-60:]
X_future = np.array([last_60_days])
predicted_future_price = model.predict(X_future)
predicted_future_price = scaler.inverse_transform(np.concatenate((predicted_future_price, np.zeros((predicted_future_price.shape[0], scaled_data.shape[1] - 1))), axis=1))[:, 0]

print("Predicted gold price for the next day:", predicted_future_price[0])


In [None]:
model.save('lstm_gold_price_model.h5')

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import yfinance as yf

# Fetch gold price data for the last 5 years from Yahoo Finance
gold_data = yf.download('GC=F', start='2018-01-01', end='2023-01-01')

# Create dummy economic indicators (replace with actual data if available)
gold_data['InflationRate'] = np.random.uniform(1.0, 3.0, size=len(gold_data))  # Example inflation rate
gold_data['UnemploymentRate'] = np.random.uniform(4.0, 6.0, size=len(gold_data))  # Example unemployment rate
gold_data['GeopoliticalRiskIndex'] = np.random.uniform(90, 130, size=len(gold_data))  # Example geopolitical risk

# Selecting relevant features
features = ['Close', 'InflationRate', 'UnemploymentRate', 'GeopoliticalRiskIndex']
gold_data = gold_data.dropna()  # Drop any rows with missing values
data = gold_data[features].values

# Scaling the features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Prepare training data with 60 days look-back
X, y = [], []
for i in range(60, len(scaled_data)):
    X.append(scaled_data[i-60:i])  # The last 60 days' worth of data
    y.append(scaled_data[i, 0])  # Predicting the 'Close' price

X, y = np.array(X), np.array(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Check shapes
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)


In [None]:
from tensorflow.keras.models import load_model
import numpy as np

# Load the trained LSTM model
model = load_model('lstm_gold_price_model.h5')

# Predict future prices based on the latest data (including external economic indicators)
def predict_gold_price_for_future(economic_data, inflation_rate, unemployment_rate, geopolitical_risk):
    """
    Predict gold price based on economic data and additional inputs.

    :param economic_data: Last 60 days of scaled historical data
    :param inflation_rate: Future inflation rate
    :param unemployment_rate: Future unemployment rate
    :param geopolitical_risk: Future geopolitical risk index
    :return: Predicted gold price for the future date
    """

    # Prepare the input data for prediction (add the economic indicators)
    # Since we don't have the future 'Close' price, we use 0 as a placeholder
    future_data = np.array([[0, inflation_rate, unemployment_rate, geopolitical_risk]])

    # Add the future indicators to the last 60 days of economic data
    new_input = np.concatenate((economic_data, future_data), axis=0)

    # Scale the input data using the scaler
    new_input_scaled = scaler.transform(new_input)

    # Take the last 60 days for prediction
    X_future = new_input_scaled[-60:]

    # Reshape the input to match LSTM expected input
    X_future = np.array([X_future])  # Reshape to (1, 60, 4)

    # Predict the future gold price (for the next day)
    predicted_future_price = model.predict(X_future)

    # Inverse transform to get back the actual price
    predicted_future_price = scaler.inverse_transform(
        np.concatenate((predicted_future_price, np.zeros((predicted_future_price.shape[0], new_input.shape[1] - 1))), axis=1)
    )[:, 0]

    return predicted_future_price[0]

# Example: Predict the gold price for the next day with new economic indicators
predicted_gold_price = predict_gold_price_for_future(
    economic_data=scaled_data[-60:],  # Last 60 days of historical data
    inflation_rate=2.5,               # Future inflation rate
    unemployment_rate=5.0,            # Future unemployment rate
    geopolitical_risk=120             # Future geopolitical risk index
)

print("Predicted gold price:", predicted_gold_price)
