In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Step 2: Load and Prepare Data
data_1 = pd.read_csv(r'C:\Users\bravo\OneDrive\OneDrive Files\Desktop\train_set_1.csv')
data_2 = pd.read_csv(r'C:\Users\bravo\OneDrive\OneDrive Files\Desktop\train_set_2.csv')
data_3 = pd.read_csv(r'C:\Users\bravo\OneDrive\OneDrive Files\Desktop\train_set_3.csv')

# Step 3: Generate Features for Financial Time Series Data
def generate_features(data):
    lag = 5
    data['SMA_5'] = data['value'].rolling(window=5).mean()
    data['SMA_20'] = data['value'].rolling(window=20).mean()

    for i in range(1, lag + 1):
        data[f'Lag_{i}'] = data['value'].shift(i)
    
    data['Rolling_STD_5'] = data['value'].rolling(window=5).std()
    data['Rolling_STD_20'] = data['value'].rolling(window=20).std()
    
    roc_period = 1
    data['ROC'] = (data['value'].diff(roc_period).shift(-1) > 0).astype(int)  # Shift ROC as required

    return data

data_1 = generate_features(data_1)
data_2 = generate_features(data_2)
data_3 = generate_features(data_3)

# Step 4: Prepare Features and Labels for all Datasets
def prepare_data_cnn(data):
    lag = 5
    data = data.dropna()
    
    X = data[['SMA_5', 'SMA_20', 'Rolling_STD_5', 'Rolling_STD_20'] + [f'Lag_{i}' for i in range(1, lag + 1)]].values
    X = X.reshape(X.shape[0], X.shape[1], 1)  # Reshape for CNN input
    y = data['ROC']

    return X, y

X_1, y_1 = prepare_data_cnn(data_1)
X_2, y_2 = prepare_data_cnn(data_2)
X_3, y_3 = prepare_data_cnn(data_3)

# Step 5: Split Data into Training and Test Sets for all Datasets
def split_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    return X_train, X_test, y_train, y_test

# Step 6: Train and Evaluate Models for all Datasets
def create_temporal_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model



def train_and_evaluate_cnn_model(X_train, y_train, X_test, y_test):
    early_stopping = EarlyStopping(patience=10, restore_best_weights=True)
    input_shape = (X_train.shape[1], 1)
    model = create_temporal_cnn_model(input_shape)
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print("Test Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    return model

# Now, when you call the evaluation function, only accuracy and classification report will be printed.
print("Evaluation for Dataset 1:")
X_train_1, X_test_1, y_train_1, y_test_1 = split_data(X_1, y_1)
model_1 = train_and_evaluate_cnn_model(X_train_1, y_train_1, X_test_1, y_test_1)

print("Evaluation for Dataset 2:")
X_train_2, X_test_2, y_train_2, y_test_2 = split_data(X_2, y_2)
model_2 = train_and_evaluate_cnn_model(X_train_2, y_train_2, X_test_2, y_test_2)

print("Evaluation for Dataset 3:")
X_train_3, X_test_3, y_train_3, y_test_3 = split_data(X_3, y_3)
model_3 = train_and_evaluate_cnn_model(X_train_3, y_train_3, X_test_3, y_test_3)

# Function to print confusion matrix
def print_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)
    tn, fp, fn, tp = cm.ravel()
    print(f"True Negatives: {tn}")
    print(f"False Positives: {fp}")
    print(f"False Negatives: {fn}")
    print(f"True Positives: {tp}")

# Evaluation for Dataset 1
y_pred_1 = (model_1.predict(X_test_1) > 0.5).astype(int)
print_confusion_matrix(y_test_1, y_pred_1)

# Evaluation for Dataset 2
y_pred_2 = (model_2.predict(X_test_2) > 0.5).astype(int)
print_confusion_matrix(y_test_2, y_pred_2)

# Evaluation for Dataset 3
y_pred_3 = (model_3.predict(X_test_3) > 0.5).astype(int)
print_confusion_matrix(y_test_3, y_pred_3)


Evaluation for Dataset 1:
Test Accuracy: 0.5131348511383538
Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.37      0.44      2017
           1       0.51      0.66      0.57      1980

    accuracy                           0.51      3997
   macro avg       0.52      0.51      0.50      3997
weighted avg       0.52      0.51      0.50      3997

Evaluation for Dataset 2:
Test Accuracy: 0.4938704028021016
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2023
           1       0.49      1.00      0.66      1974

    accuracy                           0.49      3997
   macro avg       0.25      0.50      0.33      3997
weighted avg       0.24      0.49      0.33      3997

Evaluation for Dataset 3:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 0.5258944208156117
Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.50      0.51      1981
           1       0.53      0.55      0.54      2016

    accuracy                           0.53      3997
   macro avg       0.53      0.53      0.53      3997
weighted avg       0.53      0.53      0.53      3997

Confusion Matrix:
[[ 754 1263]
 [ 683 1297]]
True Negatives: 754
False Positives: 1263
False Negatives: 683
True Positives: 1297
Confusion Matrix:
[[   0 2023]
 [   0 1974]]
True Negatives: 0
False Positives: 2023
False Negatives: 0
True Positives: 1974
Confusion Matrix:
[[ 989  992]
 [ 903 1113]]
True Negatives: 989
False Positives: 992
False Negatives: 903
True Positives: 1113
