In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as smi
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import LSTM, Dense, SimpleRNN, GRU
from keras.initializers import HeNormal
from keras_self_attention import SeqSelfAttention
import MetaTrader5 as mt5
from datetime import datetime
import tf2onnx
import os

In [None]:
mt5.initialize()
inp_start_date = datetime(2010, 1, 1) # Start Date
inp_end_date = datetime(2023, 12, 31) # End Date

symbols = ["EURUSD", "EURJPY"] # Custom Symbol
""" 
symbols = ["AUDCAD", "AUDCHF", "AUDJPY", "AUDNZD", "AUDUSD", "CADCHF", "CADJPY", "CHFJPY",
           "EURAUD", "EURCAD", "EURCHF", "EURGBP", "EURJPY", "EURNZD", "EURUSD", "GBPAUD",
           "GBPCAD", "GBPCHF", "GBPJPY", "GBPNZD", "GBPUSD", "NZDCAD", "NZDCHF", "NZDJPY",
           "NZDUSD", "USDCAD", "USDCHF", "USDJPY", "USDSGD"] # All Symbols
"""


# Get data from the client terminal
symbols_data = {}
for symbol in symbols:
    symbol_rates = mt5.copy_rates_range(symbol, mt5.TIMEFRAME_D1, inp_start_date, inp_end_date)
    symbols_data[symbol] = {"data": pd.DataFrame(symbol_rates)}

# Concatenate data for all symbols
df = pd.concat([symbols_data[symbol]["data"] for symbol in symbols], keys=symbols)
df = df.rename(columns={'time': 'date'})
df = df.rename(columns={'tick_volume': 'volume'})
df = df.drop(columns=['spread', 'real_volume'])
df['date'] = pd.to_datetime(df['date'], unit='s')

# Convert numeric columns to float64 data type
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
df[numeric_columns] = df[numeric_columns].astype('float64')

# Standardize all columns for each symbol
scaler = StandardScaler()
for symbol in symbols:
    df.loc[symbol, numeric_columns] = scaler.fit_transform(df.loc[symbol, numeric_columns])

# Plot standardized data
plt.figure(figsize=(20, 6))
for symbol in symbols:
    plt.plot(df.loc[symbol]['date'], df.loc[symbol]['close'], label=symbol) 

plt.xlabel('Date')
plt.ylabel('Standardized')
plt.legend()
plt.title('Symbols')
plt.show() 

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df_plot = df[df['date'] >= '2000-01-01']

# Create a figure and subgraphs
plt.figure(figsize=(14, 7))

# Iterate over the columns and create the subgraphs
for i, column in enumerate(['open', 'high', 'low', 'close', 'volume']):
    plt.subplot(2, 3, i+1)
    df_plot[column].hist(bins=50)
    plt.title(column)

# Adjust the layout and show the figure
plt.tight_layout()
plt.show()

# Data Analysis

In [None]:
# Converting the 'Date' column to datetime format and set it as the index
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [None]:
df.mean()

In [None]:
df.median()

In [None]:
df.skew()

In [None]:
df.shape

In [None]:
decomposition = seasonal_decompose(df['close'], model='additive', period=50)  # Assuming number of trading days in a year
plt.figure(figsize=(14, 7))

# Observed
plt.subplot(411)
plt.scatter(decomposition.observed.index, decomposition.observed, label='Observed', marker='.', s=5)
plt.legend(loc='upper left')

# Trend
plt.subplot(412)
plt.scatter(decomposition.trend.index, decomposition.trend, label='Trend', marker='.', s=5)
plt.legend(loc='upper left')

# Seasonality
plt.subplot(413)
plt.scatter(decomposition.seasonal.index, decomposition.seasonal, label='Seasonality', marker='.', s=5)
plt.legend(loc='upper left')

# Residuals
plt.subplot(414)
plt.scatter(decomposition.resid.index, decomposition.resid, label='Residuals', marker='.', s=5)
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
skewness_values = df.skew()
mean_values = df.mean()
fig, ax = plt.subplots(1, 2, figsize=(15, 10))
sns.barplot(x=skewness_values.index, y=skewness_values.values, ax=ax[0])
ax[0].set_title('Skewness')
ax[0].tick_params(axis='x', rotation=45)
sns.barplot(x=mean_values.index, y=mean_values.values, ax=ax[1])
ax[1].set_title('Mean')
ax[1].tick_params(axis='x', rotation=45)
plt.show()

In [None]:
columns_per_row = 3
num_columns = len(df.describe().columns)
num_rows = -(-num_columns // columns_per_row)

fig, ax = plt.subplots(num_rows, columns_per_row, figsize=(15, 10))
plt.subplots_adjust(wspace=0.5, hspace=0.5)

for i, column in enumerate(df.describe().columns):
    current_row = i // columns_per_row
    current_column = i % columns_per_row
    sns.boxplot(df[column], ax=ax[current_row, current_column])
    ax[current_row, current_column].set_title(column)

plt.show()

In [None]:
columns_per_row = 3
num_columns = len(df.describe().columns)
num_rows = -(-num_columns // columns_per_row)

fig, ax = plt.subplots(num_rows, columns_per_row, figsize=(15, 15))
plt.subplots_adjust(wspace=0.5, hspace=0.5)

for i, column in enumerate(df.describe().columns):
    current_row = i // columns_per_row
    current_column = i % columns_per_row
    
    sns.histplot(df[column], ax=ax[current_row, current_column], color='blue', stat="density", kde_kws={'bw_method': 0.2})
    
    sns.kdeplot(df[column], ax=ax[current_row, current_column], color='red', linewidth=2)
    
    ax[current_row, current_column].set_title(column)

plt.show()

In [None]:
columns_per_row = 3
num_columns = len(df.describe().columns)
num_rows = -(-num_columns // columns_per_row)

fig, ax = plt.subplots(num_rows, columns_per_row, figsize=(15, 15))
plt.subplots_adjust(wspace=0.5, hspace=0.5)

for i, column in enumerate(df.describe().columns):
    current_row = i // columns_per_row
    current_column = i % columns_per_row
    
    smi.qqplot(df[column], ax=ax[current_row, current_column], line="r")
    ax[current_row, current_column].set_title(column)

plt.show()

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(15, 8))
sns.lineplot(data=df, x='date', y='high', label='high', ax = ax[0,0])
sns.lineplot(data=df, x='date', y='open', label='low',ax = ax[0,1])
sns.lineplot(data=df, x='date', y='low', label='close',ax = ax[1,0])
sns.lineplot(data=df, x='date', y='close', label='open',ax = ax[1,1])
plt.show()

In [None]:
sns.set(style="whitegrid", palette="gist_rainbow_r")

pair_plot = sns.pairplot(df, corner=True, markers='.', diag_kind='kde', height=2)

pair_plot.fig.suptitle("Pair Plot of df_nflx", y=1.02)
plt.show()

# PREPARE DATA


In [None]:
df = df.reset_index()

In [None]:
plt.figure(figsize=(15,10))
sns.lineplot(data=df, x='date', y='open', label='close')
min_index = df['close'].idxmin()
max_index = df['close'].idxmax()
min_date = df.loc[min_index, 'date']
max_date = df.loc[max_index, 'date']
plt.axvline(x=min_date, color='r', linestyle='--', label='Min Close')
plt.axvline(x=max_date, color='g', linestyle='--', label='Max Close')
plt.legend()
plt.show()

In [None]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df_plot["close"].values.reshape(-1, 1))

In [None]:
def create_sequences(data, seq_length):
    sequences, labels = [], []
    for i in range(len(data) - seq_length):
        seq = data[i : i + seq_length, 0]
        label = data[i + seq_length, 0]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [None]:
seq_length = 4
n_samples = len(data_scaled)
sequences, labels = create_sequences(data_scaled, seq_length)
split = int(0.8 * len(sequences))
X_train, y_train = sequences[:split], labels[:split]
X_test, y_test = sequences[split:], labels[split:]

# LSTM

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(100, activation='tanh', input_shape=(seq_length, 1),return_sequences=True))
model_lstm.add(LSTM(50, activation='tanh'))
model_lstm.add(Dense(1))
model_lstm.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model_lstm.fit(X_train, y_train, epochs=30, batch_size=64,validation_data=(X_test, y_test))
predictions_lstm = model_lstm.predict(X_test)

In [None]:
predictions__LSTM = scaler.inverse_transform(predictions_lstm)
y_test_actual_LSTM = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
dates_test = df['date'].iloc[split + seq_length:]
df_plot_lstm = pd.DataFrame({'Dates': dates_test, 'Real': y_test_actual_LSTM.flatten(), 'Predict': predictions__LSTM.flatten()})
df_plot_lstm.head(3)

# LSTM + ATTENTION

In [None]:
model_lstm_attention = Sequential()
model_lstm_attention.add(LSTM(100, activation='tanh', input_shape=(seq_length, 1), return_sequences=True, kernel_initializer=HeNormal()))
model_lstm_attention.add(SeqSelfAttention(attention_activation='tanh'))
model_lstm_attention.add(LSTM(50, activation='tanh', return_sequences=False, kernel_initializer=HeNormal()))
model_lstm_attention.add(Dense(1, kernel_initializer=HeNormal()))
model_lstm_attention.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model_lstm_attention.fit(X_train, y_train, epochs=40, batch_size=32,validation_data=(X_test, y_test))
predictions_scaled_lstm_attention = model_lstm_attention.predict(X_test)

In [None]:
predictions_lstm_attention = scaler.inverse_transform(predictions_scaled_lstm_attention)
y_test_lstm_attention = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
df_plot_lstm_attention = pd.DataFrame({'Dates': dates_test, 'Real':y_test_lstm_attention.flatten(), 'Predict': predictions_lstm_attention.flatten()})
df_plot_lstm_attention.head(3)

# RNN

In [None]:
# Setting a seed for the HeNormal initializer
he_normal_initializer = HeNormal(seed=42)

model_RNN = Sequential()
model_RNN.add(SimpleRNN(100, activation='tanh', input_shape=(seq_length, 1), return_sequences=True, kernel_initializer=he_normal_initializer))
model_RNN.add(SeqSelfAttention(attention_activation='tanh', kernel_initializer=he_normal_initializer))
model_RNN.add(SimpleRNN(50, activation='tanh', kernel_initializer=he_normal_initializer))
model_RNN.add(Dense(1, kernel_initializer=he_normal_initializer))
model_RNN.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model_RNN.fit(X_train, y_train, epochs=30, batch_size=64,validation_data=(X_test, y_test))
predictions_scaled_RNN = model_RNN.predict(X_test)

In [None]:
predictions_RNN = scaler.inverse_transform(predictions_scaled_RNN)
y_test_RNN = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
df_plot_RNN = pd.DataFrame({'Dates': dates_test, 'Real': y_test_RNN.flatten(),'Predict': predictions_RNN.flatten()})
df_plot_RNN.head(3)

# GRU

In [None]:
model_gru = Sequential()
model_gru.add(GRU(100, activation='tanh', input_shape=(seq_length, 1),return_sequences=True))
model_gru.add(GRU(50, activation='tanh'))
model_gru.add(Dense(1))
model_gru.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

In [None]:
model_gru.fit(X_train, y_train, epochs=30, batch_size=32,validation_data=(X_test, y_test))
predictions_scaled_gru = model_gru.predict(X_test)

In [None]:
predictions_GRU = scaler.inverse_transform(predictions_scaled_gru)
y_test_GRU = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
df_plot_GRU = pd.DataFrame({'Dates': dates_test, 'Real': y_test_GRU.flatten(),'Predict': predictions_GRU.flatten()})
df_plot_GRU.head(3)

# Prediction Evaluation

In [None]:
mse_lstm = mean_squared_error(df_plot_lstm['Real'], df_plot_lstm['Predict'])
mse_lstm_attention = mean_squared_error(df_plot_lstm_attention['Real'],df_plot_lstm_attention['Predict'])
mse_rnn = mean_squared_error(df_plot_RNN['Real'], df_plot_RNN['Predict'])
mse_GRU = mean_squared_error(df_plot_GRU['Real'], df_plot_GRU['Predict'])

r2_lstm = r2_score(df_plot_lstm['Real'], df_plot_lstm['Predict'])
r2_df_plot_lstm_attention= r2_score(df_plot_lstm_attention['Real'],df_plot_lstm_attention['Predict'])
r2_rnn = r2_score(df_plot_RNN['Real'], df_plot_RNN['Predict'])
r2_GRU = r2_score(df_plot_GRU['Real'], df_plot_GRU['Predict'])

rmse_lstm = mean_squared_error(df_plot_lstm['Real'], df_plot_lstm['Predict'],squared=False)
rmse_rnn = mean_squared_error(df_plot_lstm_attention['Real'],df_plot_lstm_attention['Predict'], squared=False)
rmse_lstm = mean_squared_error(df_plot_RNN['Real'], df_plot_RNN['Predict'],squared=False)
rmse_GRU = mean_squared_error(df_plot_GRU['Real'], df_plot_GRU['Predict'],squared=False)

# LSTM / ATTENTION / GRU / RNN

In [None]:
def plot_subplot(ax, df_real, df_predict, title, r2, mse, rmse):
    sns.lineplot(
        x="Dates",
        y="Real",
        data=df_real,
        label="Real",
        color="blue",
        ax=ax,
        linewidth=2,
    )
    sns.lineplot(
        x="Dates",
        y="Predict",
        data=df_predict,
        label="Predict",
        color="red",
        linestyle="dashed",
        ax=ax,
        linewidth=2,
    )
    ax.text(
        0.5, 0.80, f"R²: {r2:.4f}", fontsize=8, color="black", transform=ax.transAxes
    )
    ax.text(
        0.5, 0.75, f"MSE: {mse:.4f}", fontsize=8, color="black", transform=ax.transAxes
    )
    ax.text(
        0.5,
        0.70,
        f"RMSE: {rmse:.4f}",
        fontsize=8,
        color="black",
        transform=ax.transAxes,
    )
    ax.set_title(title, fontsize=10)
    ax.legend(fontsize=8)
    ax.set_xticks(ax.get_xticks())
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=6)


fig, axes = plt.subplots(2, 2, figsize=(14, 10))

plot_subplot(
    axes[0, 0], df_plot_lstm, df_plot_lstm, "LSTM", r2_lstm, mse_lstm, rmse_lstm
)
plot_subplot(
    axes[0, 1],
    df_plot_lstm_attention,
    df_plot_lstm_attention,
    "LSTM + Attention",
    r2_rnn,
    mse_rnn,
    rmse_rnn,
)
plot_subplot(axes[1, 0], df_plot_RNN, df_plot_RNN, "RNN", r2_rnn, mse_rnn, rmse_rnn)
plot_subplot(axes[1, 1], df_plot_GRU, df_plot_GRU, "GRU", r2_GRU, mse_GRU, rmse_GRU)
plt.tight_layout()
plt.show()

# SAVE MODEL TO ONNX

In [None]:
# Save Model
model = [model_lstm, model_lstm_attention, model_RNN, model_gru]

inp_model_name = "model.Test.lstm-rnn-gru.onnx"
output_folder = "D:/"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
output_path = os.path.join(output_folder, inp_model_name)
if os.path.exists(output_path):
    os.remove(output_path)

onnx_model = tf2onnx.convert.from_keras(model[1], output_path=output_path) # Select the model to analyze in mq5.
print(f"Model Saved in {output_path}")