In [1]:
import os

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from pyts.image import GramianAngularField
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F


In [13]:

# Load the dataset
df = pd.read_csv("dataset/EURUSD_Daily_200005300000_202405300000.csv", delimiter="\t")

# Extract the closing prices
closing = df["<CLOSE>"]

# Parameters
window_size = 10  # Example window size
test_size = 0.2   # Test set size


scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)

# Create sliding window features and labels
X, y = [], []
for i in range(len(closing) - window_size):
    window = closing[i:i + window_size].values
    target = 1 if closing[i + window_size] > closing[i + window_size - 1] else 0
    # Standardize the data
    features = scaler.fit_transform(window[:-1].reshape(-1, 1))
    X.append(features)
    y.append(target)


X = np.array(X).squeeze()
y = np.array(y)

# Step 1: Initial Split into Train_Valid and Test Sets
train_valid_size = 0.8  # 80% for training and validation, 20% for testing
train_valid, X_test, doo, y_test = train_test_split(X, y, test_size=(1 - train_valid_size), shuffle=False)

# Step 2: Split Train_Valid into Train and Validation Sets
train_size = 0.75  # 75% of train_valid for training, 25% for validation
X_train, X_valid, y_train, y_valid = train_test_split(train_valid, y[:len(train_valid)], test_size=(1 - train_size), shuffle=True, random_state=42)



# Train the SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train, y_train)

# Test the SVM model
svm_predictions = svm_model.predict(X_valid)

# Now, we generate GAF images for the test set using pyts
feature_window_size = window_size - 1
gaf = GramianAngularField(image_size=feature_window_size, method='summation')
X_test_gaf = gaf.fit_transform(X_valid)

# Save GAF images and labels for later CNN training
os.makedirs('gaf_images', exist_ok=True)
for idx, (image, label) in enumerate(zip(X_test_gaf, svm_predictions)):
    class_label = "predictable" if label == y_valid[idx] else "unpredictable"
    np.save(f'gaf_images/{class_label}_{idx}.npy', image)

# GAF images are now saved, and the next step will be CNN training.


In [3]:
class GAFClassifierCNN(nn.Module):
    def __init__(self):
        super(GAFClassifierCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calculate the size after the pooling layers
        self.fc1_input_size = 32 * (feature_window_size // 2 // 2) * (feature_window_size // 2 // 2)

        self.fc1 = nn.Linear(self.fc1_input_size, 128)  # Adjust according to the input size
        self.fc2 = nn.Linear(128, 2)  # 2 output classes: predictable, unpredictable

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.fc1_input_size)  # Flatten the tensor for the fully connected layer
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [4]:
class GAFDataset(Dataset):
    def __init__(self, image_dir):
        self.image_dir = image_dir
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.npy')]
        self.labels = [0 if 'unpredictable' in f else 1 for f in self.image_files]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_files[idx])
        image = np.load(img_name)
        image = np.expand_dims(image, axis=0)  # Add channel dimension
        label = self.labels[idx]
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# Prepare the dataset and dataloader
gaf_dataset = GAFDataset(image_dir='gaf_images')
train_loader = DataLoader(gaf_dataset, batch_size=32, shuffle=True)


In [5]:
# Initialize the CNN, loss function, and optimizer
cnn_model = GAFClassifierCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

In [6]:
# Additional imports for early stopping
import copy

# Training loop with early stopping
num_epochs = 100
patience = 10  # Number of epochs with no improvement after which training will be stopped
best_accuracy = 0.0
best_model_wts = copy.deepcopy(cnn_model.state_dict())
epochs_no_improve = 0

for epoch in range(num_epochs):
    cnn_model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Calculate the number of correct predictions
        _, predicted = torch.max(outputs.data, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

    accuracy = correct_predictions / total_predictions * 100

    # Early stopping logic
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model_wts = copy.deepcopy(cnn_model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f'Early stopping at epoch {epoch + 1}')
        break

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}, Accuracy: {accuracy:.2f}%')

# Load the best model weights
cnn_model.load_state_dict(best_model_wts)

print("CNN training completed.")
print(f'Best Accuracy: {best_accuracy:.2f}%')


In [7]:
# Step 6: Testing on the Test Set
def predict_with_combined_model(cnn_model, svm_model, feature_window):
    # No need to scale the feature_window again since it was done during data preparation

    # Generate GAF image from the feature window
    gaf = GramianAngularField(image_size=feature_window_size, method='summation')
    # gaf_image = gaf.fit_transform(np.array( feature_window ).reshape(1, -1) )
    gaf_image = gaf.fit_transform(feature_window )

    # Prepare the GAF image for the CNN
    gaf_image = torch.tensor(gaf_image, dtype=torch.float32).unsqueeze(0).unsqueeze(
        0)  # Add batch and channel dimensions

    # Use CNN to predict if the GAF image is "predictable" or "unpredictable"
    cnn_model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        output = cnn_model(gaf_image)
        _, cnn_prediction = torch.max(output.data, 1)

    if cnn_prediction.item() == 1:  # If CNN predicts "predictable"
        # Use the SVM model to predict "up" or "down"
        svm_prediction = svm_model.predict(feature_window.reshape(1, -1))
        return "up" if svm_prediction[0] == 1 else "down"
    else:
        return "unpredictable"

In [15]:
# Loop through each sample in the test set
test_predictions = []
cnn_predictable_count = 0
correct_predictable_predictions = 0

for i in range(len(X_test)):
    feature_window = X_test[i]  # Extract the feature window for the i-th sample
    prediction = predict_with_combined_model(cnn_model, svm_model, feature_window)
    
    if prediction != "unpredictable":  # Only consider cases where the CNN predicted "predictable"
        cnn_predictable_count += 1
        test_predictions.append(prediction)
        
        # Check if the prediction is correct
        if (prediction == "up" and y_test[i] == 1) or (prediction == "down" and y_test[i] == 0):
            correct_predictable_predictions += 1

# Calculate accuracy only on the "predictable" cases
if cnn_predictable_count > 0:
    test_accuracy = correct_predictable_predictions / cnn_predictable_count * 100
    print(f'Test Set Accuracy on "Predictable" Cases: {test_accuracy:.2f}%')
else:
    print("No 'Predictable' cases identified by the CNN.")