In [None]:
# CNN Practical Session - Complete Code
# Student ID: 2463693

# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

# Load and prepare the dataset
df = pd.read_csv('EURUSD_tick_OK-2.csv')
print(f"\nDataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nDataset info:")
print(df.info())

# Prepare data for CNN
# Separate features and target (using Vol_Ask_N as target for regression)
features = df[['Ask_N_200_3', 'DateDelta1_N', 'Bid_N_200_3', 'Vol_Bid_N']].values
target = df['Vol_Ask_N'].values

# Normalize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Reshape for CNN (samples, timesteps, features)
# Reshape to (samples, features, 1) for Conv1D
X = features_scaled.reshape(features_scaled.shape[0], features_scaled.shape[1], 1)
y = target.reshape(-1, 1)

print(f"\nX shape: {X.shape}")
print(f"y shape: {y.shape}")

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"\nTrain set: X={X_train.shape}, y={y_train.shape}")
print(f"Test set: X={X_test.shape}, y={y_test.shape}")

# ============================================================================
# Original Practical Session CNN Model
# ============================================================================
print("\n" + "="*60)
print("ORIGINAL PRACTICAL SESSION CNN MODEL")
print("="*60)

def create_original_cnn_model():
    model = keras.Sequential([
        layers.Input(shape=(X_train.shape[1], 1)),
        layers.Conv1D(filters=32, kernel_size=7, activation='relu', padding='same'),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(filters=64, kernel_size=7, activation='relu', padding='same'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1)  # Regression output
    ])
    return model

original_model = create_original_cnn_model()
original_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

print("\nOriginal Practical Session CNN Architecture:")
original_model.summary()

# Train original model
original_batch_size = 32  # Typical default
original_epochs = 10  # Typical default

print("\nTraining Original Practical Session CNN Model...")
print(f"Batch size: {original_batch_size}, Epochs: {original_epochs}")

original_history = original_model.fit(
    X_train, y_train,
    batch_size=original_batch_size,
    epochs=original_epochs,
    validation_split=0.2,
    verbose=1
)

# Evaluate original model
original_test_loss, original_test_mae = original_model.evaluate(X_test, y_test, verbose=0)
print(f"\nOriginal Practical Session CNN - Test MAE: {original_test_mae:.6f}")

# ============================================================================
# Modified CNN Model (Lab Logbook Requirements)
# ============================================================================
print("\n" + "="*60)
print("MODIFIED CNN MODEL (LAB LOGBOOK REQUIREMENTS)")
print("="*60)
print("Modifications:")
print("1. Convolutional core size reduced to 5 (from 7)")
print("2. Batch size changed to 50 (from 32)")
print("3. Epochs calculated: Z + Y = 2 + 3 = 5 (from 10)")
print("4. Other parameters remain the same")
print("="*60)

# SID: 2463693 -> Z=2, Y=3 -> Epochs = Z + Y = 5
def create_modified_cnn_model():
    model = keras.Sequential([
        layers.Input(shape=(X_train.shape[1], 1)),
        layers.Conv1D(filters=32, kernel_size=5, activation='relu', padding='same'),  # Reduced from 7 to 5
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'),  # Reduced from 7 to 5
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1)  # Regression output
    ])
    return model

modified_model = create_modified_cnn_model()
modified_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

print("\nModified CNN Architecture (Lab Logbook Requirements):")
modified_model.summary()

# Train modified model with new parameters
modified_batch_size = 50  # Changed from 32 to 50
modified_epochs = 5  # Calculated: Z + Y = 2 + 3 = 5

print("\nTraining Modified CNN Model...")
print(f"Batch size: {modified_batch_size}, Epochs: {modified_epochs}")

modified_history = modified_model.fit(
    X_train, y_train,
    batch_size=modified_batch_size,
    epochs=modified_epochs,
    validation_split=0.2,
    verbose=1
)

# Evaluate modified model
modified_test_loss, modified_test_mae = modified_model.evaluate(X_test, y_test, verbose=0)
print(f"\nModified CNN - Test MAE: {modified_test_mae:.6f}")

# ============================================================================
# Comparison of Results
# ============================================================================
print("\n" + "="*60)
print("MAE COMPARISON")
print("="*60)
print(f"Original Practical Session CNN - Test MAE: {original_test_mae:.6f}")
print(f"Modified CNN (Lab Logbook) - Test MAE: {modified_test_mae:.6f}")
print(f"\nDifference: {abs(modified_test_mae - original_test_mae):.6f}")
if modified_test_mae < original_test_mae:
    improvement = ((original_test_mae - modified_test_mae) / original_test_mae) * 100
    print(f"Modified model shows {improvement:.2f}% improvement (lower MAE)")
else:
    increase = ((modified_test_mae - original_test_mae) / original_test_mae) * 100
    print(f"Modified model shows {increase:.2f}% increase in MAE")
print("="*60)

# Plot training history comparison
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# MAE comparison
axes[0].plot(original_history.history['mae'], label='Original Train MAE', marker='o')
axes[0].plot(original_history.history['val_mae'], label='Original Val MAE', marker='s')
axes[0].plot(modified_history.history['mae'], label='Modified Train MAE', marker='o')
axes[0].plot(modified_history.history['val_mae'], label='Modified Val MAE', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MAE')
axes[0].set_title('MAE Comparison')
axes[0].legend()
axes[0].grid(True)

# Loss comparison
axes[1].plot(original_history.history['loss'], label='Original Train Loss', marker='o')
axes[1].plot(original_history.history['val_loss'], label='Original Val Loss', marker='s')
axes[1].plot(modified_history.history['loss'], label='Modified Train Loss', marker='o')
axes[1].plot(modified_history.history['val_loss'], label='Modified Val Loss', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss (MSE)')
axes[1].set_title('Loss Comparison')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()


TensorFlow version: 2.19.0
Keras version: 3.10.0

Dataset shape: (1100000, 5)
Columns: ['Vol_Ask_N', 'Ask_N_200_3', 'DateDelta1_N', 'Bid_N_200_3', 'Vol_Bid_N']

First few rows:
   Vol_Ask_N  Ask_N_200_3  DateDelta1_N  Bid_N_200_3  Vol_Bid_N
0      0.176     0.636364       0.01655     0.515152      0.100
1      0.364     0.606061       0.02750     0.454545      0.420
2      0.100     0.575758       0.09770     0.454545      0.187
3      0.100     0.151515       0.01590     0.121212      0.100
4      0.270     0.212121       0.01040     0.060606      0.214

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100000 entries, 0 to 1099999
Data columns (total 5 columns):
 #   Column        Non-Null Count    Dtype  
---  ------        --------------    -----  
 0   Vol_Ask_N     1100000 non-null  float64
 1   Ask_N_200_3   1100000 non-null  float64
 2   DateDelta1_N  1100000 non-null  float64
 3   Bid_N_200_3   1100000 non-null  float64
 4   Vol_Bid_N     1100000 non-null  float


Training Original Practical Session CNN Model...
Batch size: 32, Epochs: 10
Epoch 1/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 4ms/step - loss: 0.0340 - mae: 0.1347 - val_loss: 0.0320 - val_mae: 0.1332
Epoch 2/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 4ms/step - loss: 0.0326 - mae: 0.1316 - val_loss: 0.0322 - val_mae: 0.1329
Epoch 3/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 4ms/step - loss: 0.0326 - mae: 0.1317 - val_loss: 0.0320 - val_mae: 0.1333
Epoch 4/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 4ms/step - loss: 0.0325 - mae: 0.1315 - val_loss: 0.0319 - val_mae: 0.1324
Epoch 5/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 4ms/step - loss: 0.0324 - mae: 0.1312 - val_loss: 0.0321 - val_mae: 0.1354
Epoch 6/10
[1m22000/22000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 4ms/step - loss: 0.0323 - mae: 0.1310 - val_loss: 0.0319