In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import time

In [3]:
#Baseline configuration from test made on the MLP model
data = pd.read_csv('JSE_clean_truncated.csv')

# Define horizons and input windows (as per your task)
horizon = 1
input_window = 120

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
data.head()

Unnamed: 0,ASPEN,CAPITEC,IMPLATS,GROWPNT,NORTHAM,ANGGOLD,BATS,EXXARO,WOOLIES,NASPERS-N-,...,INVPLC,DISCOVERY,AMPLATS,ANGLO,FIRSTRAND,NEDBANK,SASOL,SPAR,VODACOM,MTN_GROUP
0,5162,3850.0,16150,1310,3210.0,30250.0,22559.0,7551.0,1210.0,19587.0,...,4249.0,2705.0,49500,19680,1182.0,8825.0,29950,5450.0,5880.0,11425.0
1,5058,3800.0,16650,1350,3257.0,30050.0,22071.0,7700.0,1240.0,19762.0,...,4430.0,2750.0,51200,20800,1207.0,9370.0,30430,5500.0,5680.0,11900.0
2,4950,3800.0,16960,1350,3062.0,30960.0,21613.0,7755.0,1250.0,18818.0,...,4320.0,2700.0,50300,21395,1165.0,9200.0,30499,5450.0,5680.0,12201.0
3,4900,3825.0,16080,1350,3200.0,30650.0,22052.0,7451.0,1208.0,19472.0,...,4160.0,2691.0,51700,20850,1190.0,9000.0,29450,5516.0,5560.0,11800.0
4,4801,3825.0,17318,1350,3205.0,31521.0,21900.0,7849.0,1210.0,19637.0,...,4240.0,2733.0,51910,21300,1149.0,9132.0,30460,5600.0,5675.0,11900.0


In [5]:
class TCN(nn.Module):
    def __init__(self, input_dim, output_dim, num_channels, kernel_size=3, dropout=0.2):
        super(TCN, self).__init__()
        self.num_channels = num_channels
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        layers = []
        for i in range(len(num_channels) - 1):
            layers.append(nn.Conv1d(in_channels=num_channels[i], out_channels=num_channels[i+1], kernel_size=kernel_size, dilation=2))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
        
        self.tcn = nn.Sequential(*layers)
        self.output_layer = nn.Linear(num_channels[-1], output_dim)
    
    def forward(self, x):
        x = self.tcn(x)
        x = x[:, :, -1]  # Get the last output
        return self.output_layer(x)

In [6]:
# Function to create sliding window input-output pairs
def create_sequences(data, window_size, horizon):
    X = []
    y = []
    for i in range(len(data) - window_size - horizon):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size:i+window_size+horizon])
    return np.array(X), np.array(y)


In [7]:
# Training function
# Initialize loss function and optimizer
criterion = nn.MSELoss()

def train_model(model, train_loader, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            # Transfer the batch to the device (GPU or CPU)
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        #print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


In [8]:
# Evaluation function to calculate MAE, MAPE, and RMSE
from sklearn.metrics import mean_absolute_error, mean_squared_error

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test).cpu().numpy()
        y_test = y_test.cpu().numpy()
        
        # Mean Absolute Error (MAE)
        mae = mean_absolute_error(y_test, y_pred)
        
        # Mean Absolute Percentage Error (MAPE)
        mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
        
        # Root Mean Square Error (RMSE)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        
        return mae, mape, rmse

In [9]:
# Loop over all companies (assumed to be columns in the data)
for company_idx in range(data.shape[1]):  # Assuming there are 30 columns, one per company
    print(f"\nTraining for company {company_idx+1}")
    
    # Extract data for the current company
    company_data = data.iloc[:, company_idx].values

    # Normalize the company data (optional, but recommended)
    company_data = (company_data - np.mean(company_data)) / np.std(company_data)

    print(f"\n--- Horizon: {horizon}, Input Window: {input_window} ---")

    # Create sequences for the current horizon and input window
    X, y = create_sequences(company_data, input_window, horizon)

    # Split the data into train and test sets (e.g., 80-20 split)
    split_idx = int(0.8 * len(X))
    X_train, y_train = X[:split_idx], y[:split_idx]
    X_test, y_test = X[split_idx:], y[split_idx:]

    # Convert the data to PyTorch tensors and move to the correct device
    X_train_tensor = torch.Tensor(X_train).unsqueeze(1).to(device)  # Adding a channel dimension
    y_train_tensor = torch.Tensor(y_train).to(device)
    X_test_tensor = torch.Tensor(X_test).unsqueeze(1).to(device)
    y_test_tensor = torch.Tensor(y_test).to(device)

    # Create DataLoader for batch processing
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Initialize the TCN model
    input_dim = 1  # Single input feature (one company's share price)
    output_dim = horizon  # Predict 'horizon' days into the future
    num_channels = [input_dim] + [1, 32, 64]  # Example channel structure, can be tuned
    kernel_size = 3  # Standard TCN kernel size

    model = TCN(input_dim, output_dim, num_channels, kernel_size).to(device)

    # Initialize optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Start time tracking
    start_time = time.time()

    # Train the model
    train_model(model, train_loader, num_epochs=100)

    # End time tracking
    end_time = time.time()

    # Calculate total training time
    training_time = end_time - start_time
    print(f"Training time for company {company_idx+1}, Horizon {horizon}, Input Window {input_window}: {training_time:.2f} seconds")

    # Evaluate the model using MAE, MAPE, RMSE
    mae, mape, rmse = evaluate_model(model, X_test_tensor, y_test_tensor)
    print(f"Test MAE: {mae:.4f}, MAPE: {mape:.2f}%, RMSE: {rmse:.4f}")



Training for company 1

--- Horizon: 1, Input Window: 120 ---
Training time for company 1, Horizon 1, Input Window 120: 36.44 seconds
Test MAE: 0.0816, MAPE: 13.04%, RMSE: 0.1024

Training for company 2

--- Horizon: 1, Input Window: 120 ---
Training time for company 2, Horizon 1, Input Window 120: 35.93 seconds
Test MAE: 0.6502, MAPE: 42.91%, RMSE: 0.6871

Training for company 3

--- Horizon: 1, Input Window: 120 ---
Training time for company 3, Horizon 1, Input Window 120: 36.52 seconds
Test MAE: 0.3936, MAPE: 175.36%, RMSE: 0.4768

Training for company 4

--- Horizon: 1, Input Window: 120 ---
Training time for company 4, Horizon 1, Input Window 120: 37.90 seconds
Test MAE: 0.2767, MAPE: 97.06%, RMSE: 0.3421

Training for company 5

--- Horizon: 1, Input Window: 120 ---
Training time for company 5, Horizon 1, Input Window 120: 35.00 seconds
Test MAE: 0.7786, MAPE: 224.65%, RMSE: 0.9408

Training for company 6

--- Horizon: 1, Input Window: 120 ---
Training time for company 6, Horizo

In [10]:
# Loop over all companies (assumed to be columns in the data)
for company_idx in range(data.shape[1]):  # Assuming there are 30 columns, one per company
    print(f"\nTraining for company {company_idx+1}")
    
    # Extract data for the current company
    company_data = data.iloc[:, company_idx].values

    # Normalize the company data (optional, but recommended)
    company_data = (company_data - np.mean(company_data)) / np.std(company_data)

    print(f"\n--- Horizon: {horizon}, Input Window: {input_window} ---")

    # Create sequences for the current horizon and input window
    X, y = create_sequences(company_data, input_window, horizon)

    # Split the data into train and test sets (e.g., 80-20 split)
    split_idx = int(0.8 * len(X))
    X_train, y_train = X[:split_idx], y[:split_idx]
    X_test, y_test = X[split_idx:], y[split_idx:]

    # Convert the data to PyTorch tensors and move to the correct device
    X_train_tensor = torch.Tensor(X_train).unsqueeze(1).to(device)  # Adding a channel dimension
    y_train_tensor = torch.Tensor(y_train).to(device)
    X_test_tensor = torch.Tensor(X_test).unsqueeze(1).to(device)
    y_test_tensor = torch.Tensor(y_test).to(device)

    # Create DataLoader for batch processing
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Initialize the TCN model
    input_dim = 1  # Single input feature (one company's share price)
    output_dim = horizon  # Predict 'horizon' days into the future
    num_channels = [input_dim] + [1, 32, 64]  # Example channel structure, can be tuned
    kernel_size = 3  # Standard TCN kernel size

    model = TCN(input_dim, output_dim, num_channels, kernel_size).to(device)

    # Initialize optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Start time tracking
    start_time = time.time()

    # Train the model
    train_model(model, train_loader, num_epochs=100)

    # End time tracking
    end_time = time.time()

    # Calculate total training time
    training_time = end_time - start_time
    print(f"Training time for company {company_idx+1}, Horizon {horizon}, Input Window {input_window}: {training_time:.2f} seconds")

    # Evaluate the model using MAE, MAPE, RMSE
    mae, mape, rmse = evaluate_model(model, X_test_tensor, y_test_tensor)
    print(f"Test MAE: {mae:.4f}, MAPE: {mape:.2f}%, RMSE: {rmse:.4f}")



Training for company 1

--- Horizon: 1, Input Window: 120 ---
Training time for company 1, Horizon 1, Input Window 120: 20.81 seconds
Test MAE: 0.4770, MAPE: 73.04%, RMSE: 0.4844

Training for company 2

--- Horizon: 1, Input Window: 120 ---
Training time for company 2, Horizon 1, Input Window 120: 20.99 seconds
Test MAE: 0.4925, MAPE: 28.68%, RMSE: 0.5959

Training for company 3

--- Horizon: 1, Input Window: 120 ---
Training time for company 3, Horizon 1, Input Window 120: 20.52 seconds
Test MAE: 0.3668, MAPE: 202.97%, RMSE: 0.4630

Training for company 4

--- Horizon: 1, Input Window: 120 ---
Training time for company 4, Horizon 1, Input Window 120: 21.44 seconds
Test MAE: 0.4901, MAPE: 226.60%, RMSE: 0.5621

Training for company 5

--- Horizon: 1, Input Window: 120 ---
Training time for company 5, Horizon 1, Input Window 120: 22.18 seconds
Test MAE: 1.0561, MAPE: 297.48%, RMSE: 1.2968

Training for company 6

--- Horizon: 1, Input Window: 120 ---
Training time for company 6, Horiz

In [11]:
# Loop over all companies (assumed to be columns in the data)
for company_idx in range(data.shape[1]):  # Assuming there are 30 columns, one per company
    print(f"\nTraining for company {company_idx+1}")
    
    # Extract data for the current company
    company_data = data.iloc[:, company_idx].values

    # Normalize the company data (optional, but recommended)
    company_data = (company_data - np.mean(company_data)) / np.std(company_data)

    print(f"\n--- Horizon: {horizon}, Input Window: {input_window} ---")

    # Create sequences for the current horizon and input window
    X, y = create_sequences(company_data, input_window, horizon)

    # Split the data into train and test sets (e.g., 80-20 split)
    split_idx = int(0.8 * len(X))
    X_train, y_train = X[:split_idx], y[:split_idx]
    X_test, y_test = X[split_idx:], y[split_idx:]

    # Convert the data to PyTorch tensors and move to the correct device
    X_train_tensor = torch.Tensor(X_train).unsqueeze(1).to(device)  # Adding a channel dimension
    y_train_tensor = torch.Tensor(y_train).to(device)
    X_test_tensor = torch.Tensor(X_test).unsqueeze(1).to(device)
    y_test_tensor = torch.Tensor(y_test).to(device)

    # Create DataLoader for batch processing
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Initialize the TCN model
    input_dim = 1  # Single input feature (one company's share price)
    output_dim = horizon  # Predict 'horizon' days into the future
    num_channels = [input_dim] + [1, 32, 64]  # Example channel structure, can be tuned
    kernel_size = 3  # Standard TCN kernel size

    model = TCN(input_dim, output_dim, num_channels, kernel_size).to(device)

    # Initialize optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Start time tracking
    start_time = time.time()

    # Train the model
    train_model(model, train_loader, num_epochs=100)

    # End time tracking
    end_time = time.time()

    # Calculate total training time
    training_time = end_time - start_time
    print(f"Training time for company {company_idx+1}, Horizon {horizon}, Input Window {input_window}: {training_time:.2f} seconds")

    # Evaluate the model using MAE, MAPE, RMSE
    mae, mape, rmse = evaluate_model(model, X_test_tensor, y_test_tensor)
    print(f"Test MAE: {mae:.4f}, MAPE: {mape:.2f}%, RMSE: {rmse:.4f}")



Training for company 1

--- Horizon: 1, Input Window: 120 ---
Training time for company 1, Horizon 1, Input Window 120: 21.49 seconds
Test MAE: 0.0650, MAPE: 11.80%, RMSE: 0.0831

Training for company 2

--- Horizon: 1, Input Window: 120 ---
Training time for company 2, Horizon 1, Input Window 120: 22.61 seconds
Test MAE: 1.8836, MAPE: 126.65%, RMSE: 1.9512

Training for company 3

--- Horizon: 1, Input Window: 120 ---
Training time for company 3, Horizon 1, Input Window 120: 21.51 seconds
Test MAE: 0.3521, MAPE: 191.20%, RMSE: 0.4406

Training for company 4

--- Horizon: 1, Input Window: 120 ---
Training time for company 4, Horizon 1, Input Window 120: 22.45 seconds
Test MAE: 0.4958, MAPE: 230.97%, RMSE: 0.5711

Training for company 5

--- Horizon: 1, Input Window: 120 ---
Training time for company 5, Horizon 1, Input Window 120: 21.66 seconds
Test MAE: 1.9935, MAPE: 698.52%, RMSE: 2.4114

Training for company 6

--- Horizon: 1, Input Window: 120 ---
Training time for company 6, Hori