In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt

# Load the dataset
file_path = "EURUSD_tick_OK.csv"  # Update with the correct path if necessary
data = pd.read_csv(file_path)


In [None]:


# Normalization (as described in the uploaded file)
# Assuming the file has columns like Vol_Ask, Ask, DateDelta1, Bid, Vol_Bid
data['Vol_Ask_N'] = c] / 10  # Normalize based on max of 10 lots
data['Ask_N'] = (data['Ask'] - data['Ask'].min()) / (data['Ask'].max() - data['Ask'].min())
data['DateDelta1_N'] = data['DateDelta1'] / 20000  # Normalize based on max 20 seconds
data['Bid_N'] = (data['Bid'] - data['Bid'].min()) / (data['Bid'].max() - data['Bid'].min())
data['Vol_Bid_N'] = data['Vol_Bid'] / 10  # Normalize based on max of 10 lots

# Create tensors
N = 50  # Sequence length
n_small = len(data)  # Use the entire dataset size

# Convert to NumPy for tensor creation
data_normalized = data[['Vol_Ask_N', 'Ask_N', 'DateDelta1_N', 'Bid_N', 'Vol_Bid_N']].values

# Create input tensor (X) and output tensor (Y)
data_b = np.array([data_normalized[i:i + N] for i in range(n_small - N)])
Y = np.array([data_normalized[i + N, [3, 1]] for i in range(n_small - N)])  # Next tick's Bid and Ask

# Train-test split
train_size = int(0.9 * len(data_b))  # 90% for training
X_train, X_test = data_b[:train_size], data_b[train_size:]
y_train, y_test = Y[:train_size], Y[train_size:]

# Display shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Define the CNN model
model = Sequential([
    Conv1D(filters=50, kernel_size=5, activation='relu', input_shape=(50, 5)),
    MaxPooling1D(pool_size=7),
    Conv1D(filters=100, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(25, activation='relu'),
    Dense(2)  # Output layer for predicting Bid and Ask prices
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])

# Train the model
epochs = 10  # Calculated based on Z + Y
batch_size = 50

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Mean Absolute Error (MAE): {mae}")

# Plot training history
history_dict = history.history
mean_absolute_error_values = history_dict['mae']
val_mean_absolute_error_values = history_dict['val_mae']

epochs_range = range(1, len(mean_absolute_error_values) + 1)
plt.figure(figsize=(15, 7))
plt.plot(epochs_range, mean_absolute_error_values, 'bo', label='Training MAE')
plt.plot(epochs_range, val_mean_absolute_error_values, 'b', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error (MAE)')
plt.legend()
plt.show()

# Generate predictions and compare with true values
pred = model.predict(X_test)
plt.figure(figsize=(15, 7))
plt.plot(y_test[:, 0], label='True Bid Prices')
plt.plot(pred[:, 0], label='Predicted Bid Prices')
plt.title('Bid Price Predictions vs True Values')
plt.legend()
plt.show()

plt.figure(figsize=(15, 7))
plt.plot(y_test[:, 1], label='True Ask Prices')
plt.plot(pred[:, 1], label='Predicted Ask Prices')
plt.title('Ask Price Predictions vs True Values')
plt.legend()
plt.show()


In [3]:
# Display dataset information
print(data.head())
print(data.info())

   Vol_Ask_N  Ask_N_200_3  DateDelta1_N  Bid_N_200_3  Vol_Bid_N
0      0.176     0.636364       0.01655     0.515152      0.100
1      0.364     0.606061       0.02750     0.454545      0.420
2      0.100     0.575758       0.09770     0.454545      0.187
3      0.100     0.151515       0.01590     0.121212      0.100
4      0.270     0.212121       0.01040     0.060606      0.214
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100000 entries, 0 to 1099999
Data columns (total 5 columns):
 #   Column        Non-Null Count    Dtype  
---  ------        --------------    -----  
 0   Vol_Ask_N     1100000 non-null  float64
 1   Ask_N_200_3   1100000 non-null  float64
 2   DateDelta1_N  1100000 non-null  float64
 3   Bid_N_200_3   1100000 non-null  float64
 4   Vol_Bid_N     1100000 non-null  float64
dtypes: float64(5)
memory usage: 42.0 MB
None


In [6]:

N = 50  # Sequence length
n_small = len(data)  # Use the entire dataset size

In [7]:

data_normalized = data[['Vol_Ask_N', 'Ask_N_200_3', 'DateDelta1_N', 'Bid_N_200_3', 'Vol_Bid_N']].values


In [8]:
data_b = np.array([data_normalized[i:i + N] for i in range(n_small - N)])
Y = np.array([data_normalized[i + N, [3, 1]] for i in range(n_small - N)])  # Next tick's Bid and Ask



In [9]:
train_size = int(0.9 * len(data_b))  # 90% for training
X_train, X_test = data_b[:train_size], data_b[train_size:]
y_train, y_test = Y[:train_size], Y[train_size:]

# Display shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


X_train shape: (989955, 50, 5)
y_train shape: (989955, 2)
X_test shape: (109995, 50, 5)
y_test shape: (109995, 2)


In [10]:
model = Sequential([
    Conv1D(filters=50, kernel_size=5, activation='relu', input_shape=(50, 5)),
    MaxPooling1D(pool_size=7),
    Conv1D(filters=100, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(25, activation='relu'),
    Dense(2)  # Output layer for predicting Bid and Ask prices
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
epochs = 10 
batch_size = 50

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

Epoch 1/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 1ms/step - loss: 0.0069 - mae: 0.0600 - val_loss: 0.0052 - val_mae: 0.0527
Epoch 2/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 1ms/step - loss: 0.0052 - mae: 0.0524 - val_loss: 0.0051 - val_mae: 0.0521
Epoch 3/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 1ms/step - loss: 0.0051 - mae: 0.0517 - val_loss: 0.0050 - val_mae: 0.0507
Epoch 4/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1ms/step - loss: 0.0050 - mae: 0.0511 - val_loss: 0.0063 - val_mae: 0.0593
Epoch 5/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1ms/step - loss: 0.0050 - mae: 0.0510 - val_loss: 0.0049 - val_mae: 0.0508
Epoch 6/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1ms/step - loss: 0.0049 - mae: 0.0508 - val_loss: 0.0050 - val_mae: 0.0509
Epoch 7/10
[1m19800/19800[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [None]:
loss, mae = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Mean Absolute Error (MAE): {mae}")

In [None]:
history_dict = history.history
mean_absolute_error_values = history_dict['mae']
val_mean_absolute_error_values = history_dict['val_mae']

In [None]:
epochs_range = range(1, len(mean_absolute_error_values) + 1)
plt.figure(figsize=(15, 7))
plt.plot(epochs_range, mean_absolute_error_values, 'bo', label='Training MAE')
plt.plot(epochs_range, val_mean_absolute_error_values, 'b', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error (MAE)')
plt.legend()
plt.show()

In [None]:
pred = model.predict(X_test)
plt.figure(figsize=(15, 7))
plt.plot(y_test[:, 0], label='True Bid Prices')
plt.plot(pred[:, 0], label='Predicted Bid Prices')
plt.title('Bid Price Predictions vs True Values')
plt.legend()
plt.show()

plt.figure(figsize=(15, 7))
plt.plot(y_test[:, 1], label='True Ask Prices')
plt.plot(pred[:, 1], label='Predicted Ask Prices')
plt.title('Ask Price Predictions vs True Values')
plt.legend()
plt.show()