In [1]:
# Install PyTorch Forecasting and its dependencies
!pip install pytorch-lightning
!pip install pytorch-forecasting
!pip install torchmetrics
!pip install yfinance
!pip install plotly




In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')


In [3]:
# Define the stock ticker
ticker = 'AAPL'  # Apple Inc.

# Define the period and interval
period = '5d'  # Last 5 days
interval = '1m'  # 1-minute intervals

# Download the data
data = yf.download(tickers=ticker, period=period, interval=interval, progress=False)

# Display the first few rows
print(data.head())


Price                       Adj Close       Close        High         Low  \
Ticker                           AAPL        AAPL        AAPL        AAPL   
Datetime                                                                    
2024-12-02 14:30:00+00:00  238.700897  238.700897  238.770004  237.160004   
2024-12-02 14:31:00+00:00  239.000000  239.000000  239.059906  238.399994   
2024-12-02 14:32:00+00:00  238.934601  238.934601  239.350006  238.910004   
2024-12-02 14:33:00+00:00  239.110001  239.110001  239.199997  238.729996   
2024-12-02 14:34:00+00:00  239.296295  239.296295  239.350006  238.929993   

Price                            Open   Volume  
Ticker                           AAPL     AAPL  
Datetime                                        
2024-12-02 14:30:00+00:00  237.270004  2562715  
2024-12-02 14:31:00+00:00  238.690002   418941  
2024-12-02 14:32:00+00:00  238.960007   213308  
2024-12-02 14:33:00+00:00  238.929993   192847  
2024-12-02 14:34:00+00:00  239.080093   

In [4]:
# Check if columns are MultiIndex
if isinstance(data.columns, pd.MultiIndex):
    # Flatten MultiIndex columns
    data.columns = ['_'.join(col).strip() for col in data.columns.values]
    print("\nFlattened Columns:")
    print(data.columns)
else:
    print("\nColumns are already single-indexed.")


Flattened Columns:
Index(['Adj Close_AAPL', 'Close_AAPL', 'High_AAPL', 'Low_AAPL', 'Open_AAPL',
       'Volume_AAPL'],
      dtype='object')


In [5]:
# Rename columns to remove ticker symbol
data.rename(columns={
    'Open_AAPL': 'Open',
    'High_AAPL': 'High',
    'Low_AAPL': 'Low',
    'Close_AAPL': 'Close',
    'Volume_AAPL': 'Volume'
}, inplace=True)

print("\nRenamed Columns:")
print(data.columns)


Renamed Columns:
Index(['Adj Close_AAPL', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')


In [6]:
print("\nData Types:")
for col in ['High', 'Low', 'Close', 'Volume']:
    print(f"{col}: {type(data[col])}")


Data Types:
High: <class 'pandas.core.series.Series'>
Low: <class 'pandas.core.series.Series'>
Close: <class 'pandas.core.series.Series'>
Volume: <class 'pandas.core.series.Series'>


In [7]:
# Step 2: Check Data Retrieval
from sklearn.preprocessing import MinMaxScaler
if data.empty:
    print("DataFrame is empty. Unable to retrieve data.")
else:
    print("DataFrame successfully retrieved.")
    print(data.head())
    print("Data Shape:", data.shape)

    # Step 3: Flatten MultiIndex Columns if Necessary
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = ['_'.join(col).strip() for col in data.columns.values]
        print("\nFlattened Columns:")
        print(data.columns)
    else:
        print("\nColumns are already single-indexed.")

    # Step 4: Rename Columns Appropriately
    # Adjust this part based on your actual column names after flattening
    rename_dict = {
        'Open_AAPL': 'Open',
        'High_AAPL': 'High',
        'Low_AAPL': 'Low',
        'Close_AAPL': 'Close',
        'Adj Close_AAPL': 'Adj Close',
        'Volume_AAPL': 'Volume'
    }
    data.rename(columns=rename_dict, inplace=True)
    print("\nRenamed Columns:")
    print(data.columns)

    # Step 5: Handle Missing Values
    data.dropna(inplace=True)
    print("\nData Shape after Dropping Missing Values:", data.shape)

    # Step 6: Ensure Required Columns are Numeric
    required_columns = ['High', 'Low', 'Close', 'Volume']
    for col in required_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')
    data.dropna(inplace=True)
    print("\nData Shape after Ensuring Numeric Types:", data.shape)

    # Step 7: Calculate VWAP Manually
    # Calculate Typical Price
    data['Typical_Price'] = (data['High'] + data['Low'] + data['Close']) / 3

    # Calculate TPV (Typical Price * Volume)
    data['TPV'] = data['Typical_Price'] * data['Volume']

    # Calculate Cumulative TPV and Cumulative Volume
    data['Cumulative_TPV'] = data['TPV'].cumsum()
    data['Cumulative_Volume'] = data['Volume'].cumsum()

    # Calculate VWAP
    data['VWAP'] = data['Cumulative_TPV'] / data['Cumulative_Volume']

    # Drop intermediate columns
    data.drop(['Typical_Price', 'TPV', 'Cumulative_TPV', 'Cumulative_Volume'], axis=1, inplace=True)

    # Handle any potential division by zero by filling NaNs
    data['VWAP'] = data['VWAP'].bfill()  # Using backward fill as per FutureWarning

    # Step 8: Calculate RSI Manually
    rsi_period = 14

    # Calculate price changes
    data['Price_Change'] = data['Close'].diff()

    # Separate gains and losses
    data['Gain'] = data['Price_Change'].apply(lambda x: x if x > 0 else 0)
    data['Loss'] = data['Price_Change'].apply(lambda x: -x if x < 0 else 0)

    # Calculate Average Gain and Average Loss
    data['Avg_Gain'] = data['Gain'].rolling(window=rsi_period, min_periods=rsi_period).mean()
    data['Avg_Loss'] = data['Loss'].rolling(window=rsi_period, min_periods=rsi_period).mean()

    # Calculate RS
    data['RS'] = data['Avg_Gain'] / data['Avg_Loss']

    # Calculate RSI
    data['RSI'] = 100 - (100 / (1 + data['RS']))

    # Handle NaN values in RSI
    data['RSI'] = data['RSI'].fillna(0)

    # Drop intermediate columns
    data.drop(['Price_Change', 'Gain', 'Loss', 'Avg_Gain', 'Avg_Loss', 'RS'], axis=1, inplace=True)

    # Step 9: Calculate MACD Manually
    ema_short_period = 12
    ema_long_period = 26
    signal_period = 9

    # Calculate EMAs
    data['EMA_12'] = data['Close'].ewm(span=ema_short_period, adjust=False).mean()
    data['EMA_26'] = data['Close'].ewm(span=ema_long_period, adjust=False).mean()

    # Calculate MACD Line
    data['MACD'] = data['EMA_12'] - data['EMA_26']

    # Calculate Signal Line
    data['MACD_Signal'] = data['MACD'].ewm(span=signal_period, adjust=False).mean()

    # Calculate MACD Histogram
    data['MACD_Diff'] = data['MACD'] - data['MACD_Signal']

    # Drop intermediate EMA columns
    data.drop(['EMA_12', 'EMA_26'], axis=1, inplace=True)

    # Step 10: Drop any rows with NaN values from indicator calculations
    data.dropna(inplace=True)
    print("\nData Shape after Feature Engineering:", data.shape)

    # Step 11: Normalize Data
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(data[['Close', 'Volume', 'VWAP', 'RSI', 'MACD', 'MACD_Signal', 'MACD_Diff']])
    data_final = pd.DataFrame(scaled_features, index=data.index, columns=['Close', 'Volume', 'VWAP', 'RSI', 'MACD', 'MACD_Signal', 'MACD_Diff'])

    print("\nScaled Features Sample:")
    print(data_final.head())

# Reset the index to make 'Datetime' a column

data_final.reset_index(inplace=True)  # 'Datetime' becomes a column

# Extract time-based features
data_final['minute'] = data_final['Datetime'].dt.minute.astype(int)
data_final['hour'] = data_final['Datetime'].dt.hour.astype(int)
data_final['day_of_week'] = data_final['Datetime'].dt.dayofweek.astype(int)  # Monday=0, Sunday=6
data_final['month'] = data_final['Datetime'].dt.month.astype(int)
print("\nDataFrame after extracting time-based features:")
print(data_final.head())
print("\nDataFrame Columns:")
print(data_final.columns)

# Drop the 'Datetime' column as it's no longer needed
data_final.drop(['Datetime'], axis=1, inplace=True)


DataFrame successfully retrieved.
                           Adj Close_AAPL       Close        High         Low  \
Datetime                                                                        
2024-12-02 14:30:00+00:00      238.700897  238.700897  238.770004  237.160004   
2024-12-02 14:31:00+00:00      239.000000  239.000000  239.059906  238.399994   
2024-12-02 14:32:00+00:00      238.934601  238.934601  239.350006  238.910004   
2024-12-02 14:33:00+00:00      239.110001  239.110001  239.199997  238.729996   
2024-12-02 14:34:00+00:00      239.296295  239.296295  239.350006  238.929993   

                                 Open   Volume  
Datetime                                        
2024-12-02 14:30:00+00:00  237.270004  2562715  
2024-12-02 14:31:00+00:00  238.690002   418941  
2024-12-02 14:32:00+00:00  238.960007   213308  
2024-12-02 14:33:00+00:00  238.929993   192847  
2024-12-02 14:34:00+00:00  239.080093   200970  
Data Shape: (1948, 6)

Columns are already single-index

In [None]:
print(data_final.columns)



In [8]:
class StockDataset(Dataset):
    def __init__(self, data, seq_length=60, pred_length=30):
        """
        Args:
            data (pd.DataFrame): The preprocessed stock data.
            seq_length (int): Number of past time steps to use as input.
            pred_length (int): Number of future time steps to predict.
        """
        self.data = data
        self.seq_length = seq_length
        self.pred_length = pred_length
        self.total_length = seq_length + pred_length
        self.indices = []
        
        for i in range(len(data) - self.total_length + 1):
            self.indices.append(i)
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        start = self.indices[idx]
        end = start + self.seq_length
        pred_end = end + self.pred_length
        
        X = self.data.iloc[start:end]
        Y = self.data.iloc[end:pred_end]['Close'].values  # Predicting 'Close' prices for future steps
        
        # Convert to tensors
        X_tensor = torch.tensor(X[['Close', 'Volume', 'VWAP', 'RSI', 'MACD', 'MACD_Signal', 'MACD_Diff']].values, dtype=torch.float32)
        Y_tensor = torch.tensor(Y, dtype=torch.float32)  # Predicting multiple time steps, hence the length matches pred_length

        # Handle categorical data similarly as before
        minute = torch.tensor(X['minute'].astype(int).values, dtype=torch.long)
        hour = torch.tensor(X['hour'].astype(int).values, dtype=torch.long)
        day_of_week = torch.tensor(X['day_of_week'].astype(int).values, dtype=torch.long)
        month = torch.tensor(X['month'].astype(int).values, dtype=torch.long)
        
        categorical = torch.stack([minute, hour, day_of_week, month], dim=1)  # Shape: (seq_length, num_categorical_features)

        return X_tensor, categorical, Y_tensor


In [9]:
# Adjust 'month' to zero-based indexing
data_final['month'] = data_final['month'] - 1  # Convert 1-12 to 0-11

# Verify the adjustment
print("\nAfter adjusting 'month' to zero-based indexing:")
print(data_final['month'].unique())
print("Min month:", data_final['month'].min(), "Max month:", data_final['month'].max())



After adjusting 'month' to zero-based indexing:
[11]
Min month: 11 Max month: 11


In [10]:
# Verify the combined DataFrame
print("\nCombined DataFrame with time-based features:")
print(data_final.head())

# Initialize the dataset
seq_length = 60
pred_length = 30
train_dataset = StockDataset(data_final, seq_length=seq_length, pred_length=pred_length)

# Check a sample from the dataset
sample_X, sample_X_cat, sample_Y = train_dataset[0]
print("\nSample X (Numerical Features):")
print(sample_X)
print("\nSample X_cat (Categorical Features):")
print(sample_X_cat)
print("\nSample Y (Target Close Prices):")
print(sample_Y)




Combined DataFrame with time-based features:
      Close    Volume      VWAP  RSI      MACD  MACD_Signal  MACD_Diff  \
0  0.053544  1.000000  0.000000  0.0  0.441087     0.445894   0.501677   
1  0.101942  0.163475  0.022968  0.0  0.479813     0.454456   0.571404   
2  0.091360  0.083235  0.036732  0.0  0.501245     0.466044   0.596047   
3  0.119742  0.075251  0.046897  0.0  0.540061     0.483896   0.647061   
4  0.149887  0.078421  0.059012  0.0  0.593464     0.509985   0.714138   

   minute  hour  day_of_week  month  
0      30    14            0     11  
1      31    14            0     11  
2      32    14            0     11  
3      33    14            0     11  
4      34    14            0     11  

Sample X (Numerical Features):
tensor([[0.0535, 1.0000, 0.0000, 0.0000, 0.4411, 0.4459, 0.5017],
        [0.1019, 0.1635, 0.0230, 0.0000, 0.4798, 0.4545, 0.5714],
        [0.0914, 0.0832, 0.0367, 0.0000, 0.5012, 0.4660, 0.5960],
        [0.1197, 0.0753, 0.0469, 0.0000, 0.5401, 0.

In [11]:
from torch.utils.data import DataLoader

# Split the data into training and validation sets (80-20 split)
train_size = int(len(data_final) * 0.8)
train_df = data_final.iloc[:train_size].copy()
val_df = data_final.iloc[train_size - seq_length - pred_length + 1 :].copy()  # Ensure enough data for sequences

print(f"\nTraining data points: {len(train_df)}")
print(f"Validation data points: {len(val_df)}")

# Create training and validation datasets
train_dataset = StockDataset(train_df, seq_length=seq_length, pred_length=pred_length)
val_dataset = StockDataset(val_df, seq_length=seq_length, pred_length=pred_length)

print(f"\nNumber of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")

# Define batch size
batch_size = 64

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

print(f"\nNumber of batches in training loader: {len(train_loader)}")
print(f"Number of batches in validation loader: {len(val_loader)}")



Training data points: 1558
Validation data points: 479

Number of training samples: 1469
Number of validation samples: 390

Number of batches in training loader: 22
Number of batches in validation loader: 6


In [12]:
class VariableSelectionNetwork(nn.Module):
    def __init__(self, input_size, hidden_size=512, output_size=39, dropout=0.1):
        """
        Variable Selection Network to assign weights to each feature.
        
        Args:
            input_size (int): Number of input features (39).
            hidden_size (int): Number of hidden units.
            output_size (int): Number of output features (should match input_size).
            dropout (float): Dropout rate.
        """
        super(VariableSelectionNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, output_size)  # Set output size to match input features (39)
        self.softmax = nn.Softmax(dim=2)  # Apply softmax over the feature dimension (dim=2)
    
    def forward(self, x):
        """
        Forward pass for Variable Selection Network.
        
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_size).
        
        Returns:
            selected (torch.Tensor): Weighted sum of features of shape (batch_size, seq_length).
            weights (torch.Tensor): Weights for each feature of shape (batch_size, seq_length, output_size).
        """
        weights = self.fc1(x)  # (batch_size, seq_length, hidden_size)
        weights = self.relu(weights)
        weights = self.dropout(weights)
        weights = self.fc2(weights)  # (batch_size, seq_length, output_size=39)
        weights = self.softmax(weights)  # (batch_size, seq_length, 39)
        
        # Weighted sum: Multiply and sum along the feature dimension
        selected = torch.sum(weights * x, dim=2)  # (batch_size, seq_length)
        
        return selected, weights


In [13]:
class GatedResidualNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.1):
        super(GatedResidualNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.gate = nn.Linear(input_size, output_size)
        self.layer_norm = nn.LayerNorm(output_size)
    
    def forward(self, x):
        """
        x: (batch_size, seq_length, input_size)
        Returns:
            out: (batch_size, seq_length, output_size)
        """
        # Main pathway
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        
        # Gating mechanism
        gate = torch.sigmoid(self.gate(x))
        
        # Apply gate
        out = out * gate
        
        # Residual connection and layer normalization
        out = self.layer_norm(out + x)
        return out


In [14]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, embed_size, heads, dropout=0.1):
        super(MultiHeadAttentionLayer, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=heads, dropout=dropout, batch_first=True)
        self.layer_norm = nn.LayerNorm(embed_size)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        """
        x: (batch_size, seq_length, embed_size)
        Returns:
            out: (batch_size, seq_length, embed_size)
        """
        attn_output, _ = self.attention(x, x, x)
        out = self.layer_norm(attn_output + x)
        out = self.dropout(out)
        return out


In [15]:
class TemporalFusionLayer(nn.Module):
    def __init__(self, embed_size, lstm_hidden_size, lstm_layers=4, attention_heads=4, dropout=0.1):
        super(TemporalFusionLayer, self).__init__()
        self.lstm = nn.LSTM(input_size=embed_size, hidden_size=lstm_hidden_size, num_layers=lstm_layers, batch_first=True, dropout=dropout)
        self.attention = MultiHeadAttentionLayer(embed_size=lstm_hidden_size, heads=attention_heads, dropout=dropout)
        self.layer_norm = nn.LayerNorm(lstm_hidden_size)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        """
        x: (batch_size, seq_length, embed_size)
        Returns:
            out: (batch_size, seq_length, lstm_hidden_size)
        """
        lstm_out, _ = self.lstm(x)  # (batch_size, seq_length, lstm_hidden_size)
        attn_out = self.attention(lstm_out)  # (batch_size, seq_length, lstm_hidden_size)
        out = self.layer_norm(attn_out + lstm_out)
        out = self.dropout(out)
        return out


In [16]:
class StaticEnrichment(nn.Module):
    def __init__(self, static_size, embed_size, dropout=0.1):
        super(StaticEnrichment, self).__init__()
        self.fc = nn.Linear(static_size, embed_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        """
        x: (batch_size, static_size)
        Returns:
            enriched: (batch_size, embed_size)
        """
        enriched = self.fc(x)
        enriched = self.relu(enriched)
        enriched = self.dropout(enriched)
        return enriched


In [17]:
class OutputLayer(nn.Module):
    def __init__(self, lstm_hidden_size, pred_length):
        super(OutputLayer, self).__init__()
        self.fc = nn.Linear(lstm_hidden_size, pred_length)
    
    def forward(self, x):
        """
        x: (batch_size, lstm_hidden_size)
        Returns:
            out: (batch_size, pred_length)
        """
        out = self.fc(x)
        return out


In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TemporalFusionTransformer(nn.Module):
    def __init__(self, 
                 input_size, 
                 categorical_size, 
                 static_size, 
                 embed_size=64, 
                 lstm_hidden_size=512, 
                 lstm_layers=4, 
                 attention_heads=4, 
                 dropout=0.1, 
                 pred_length=30):
        super(TemporalFusionTransformer, self).__init__()
        self.input_size = input_size
        self.categorical_size = categorical_size
        self.static_size = static_size
        self.embed_size = embed_size
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_layers = lstm_layers
        self.attention_heads = attention_heads
        self.dropout = dropout
        self.pred_length = pred_length
        
        # Embedding layers for categorical features
        self.embedding_minute = nn.Embedding(num_embeddings=60, embedding_dim=8)
        self.embedding_hour = nn.Embedding(num_embeddings=24, embedding_dim=8)
        self.embedding_day_of_week = nn.Embedding(num_embeddings=7, embedding_dim=8)
        self.embedding_month = nn.Embedding(num_embeddings=12, embedding_dim=8)  # 0-11
        
        # Variable Selection Network
        self.variable_selection = VariableSelectionNetwork(input_size=self.input_size + 4 * 8)  # 7 + 32 = 39
        
        # Projection Layer after Variable Selection
        self.projection = nn.Linear(1, self.embed_size)  # Map from scalar to embed_size
        
        # Gated Residual Network
        self.grn = GatedResidualNetwork(input_size=self.embed_size, hidden_size=64, output_size=self.embed_size, dropout=self.dropout)
        
        # Temporal Fusion Layer
        self.temporal_fusion = TemporalFusionLayer(embed_size=self.embed_size, 
                                                  lstm_hidden_size=self.lstm_hidden_size, 
                                                  lstm_layers=self.lstm_layers, 
                                                  attention_heads=self.attention_heads, 
                                                  dropout=self.dropout)
        
        # Output Layer: Mapping to a single predicted value per time step
        self.output_layer = nn.Linear(self.lstm_hidden_size, 1)  # Produces a single value per time step
    
    def forward(self, X_real, X_cat, X_static):
        """
        Args:
            X_real: (batch_size, seq_length, input_size) - Real-valued features
            X_cat: (batch_size, seq_length, categorical_size) - Categorical features
            X_static: (batch_size, static_size) - Static features
        Returns:
            out: (batch_size, pred_length)
        """
        batch_size, seq_length, _ = X_real.size()
        
        # Embed categorical features
        minute = self.embedding_minute(X_cat[:, :, 0])  # (batch_size, seq_length, 8)
        hour = self.embedding_hour(X_cat[:, :, 1])      # (batch_size, seq_length, 8)
        day_of_week = self.embedding_day_of_week(X_cat[:, :, 2])  # (batch_size, seq_length, 8)
        month = self.embedding_month(X_cat[:, :, 3])    # (batch_size, seq_length, 8)
        
        # Concatenate embedded categorical features with real features
        X = torch.cat([X_real, minute, hour, day_of_week, month], dim=2)  # (batch_size, seq_length, 39)
        
        # Variable Selection
        X_selected, weights = self.variable_selection(X)  # X_selected: (batch_size, seq_length)
        
        # Reshape X_selected for projection
        X_selected = X_selected.unsqueeze(-1)  # (batch_size, seq_length, 1)
        
        # Project to embed_size
        X_selected = self.projection(X_selected)  # (batch_size, seq_length, embed_size)
        
        # Gated Residual Network
        X_grn = self.grn(X_selected)  # (batch_size, seq_length, embed_size)
        
        # Temporal Fusion Layer
        X_temporal = self.temporal_fusion(X_grn)  # (batch_size, seq_length, lstm_hidden_size)
        
        # Take the last 'pred_length' time steps
        X_temporal_last = X_temporal[:, -self.pred_length:, :]  # (batch_size, pred_length, lstm_hidden_size)
        
        # Output Layer
        out = self.output_layer(X_temporal_last)  # (batch_size, pred_length, 1)
        
        # Squeeze the output to get rid of the singleton dimension
        out = out.squeeze(-1)  # (batch_size, pred_length)
        
        return out


In [19]:
# Initialize the model
input_size = 7  # ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_5', 'MA_10', 'MA_20', 'Return']
categorical_size = 4 # ['minute', 'hour', 'day_of_week', 'day_of_month', 'month', 'year']
static_size = 0  # No static features in this setup

# Instantiate the model
model = TemporalFusionTransformer(
    input_size=input_size,
    categorical_size=categorical_size,
    static_size=static_size,
    embed_size=64,
    lstm_hidden_size=512,
    lstm_layers=4,
    attention_heads=4,
    dropout=0.1,
    pred_length=pred_length
)

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define the loss function
criterion = nn.MSELoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [20]:
print(f"\nVariableSelectionNetwork input size: {model.variable_selection.fc1.in_features}")



VariableSelectionNetwork input size: 39


In [21]:
def train(model, train_loader, val_loader, criterion, optimizer, device, epochs):
    for epoch in range(epochs):
        model.train()
        for batch_idx, (X_real, X_cat, Y) in enumerate(train_loader):
            X_real = X_real.to(device)
            X_cat = X_cat.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(X_real, X_cat, torch.zeros((X_real.size(0), static_size)).to(device))  # (batch_size, pred_length)
            
            # Ensure `Y` and `outputs` have the same shape
            assert outputs.shape == Y.shape, f"Shape mismatch: outputs shape {outputs.shape} and Y shape {Y.shape}"

            # Calculate loss
            loss = criterion(outputs, Y)  # Shapes must match here
            loss.backward()
            optimizer.step()
            
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


In [22]:
def evaluate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_real, X_cat, Y in val_loader:
            X_real = X_real.to(device)
            X_cat = X_cat.to(device)
            Y = Y.to(device)
            
            outputs = model(X_real, X_cat, torch.zeros((X_real.size(0), static_size)).to(device))  # No static features
            loss = criterion(outputs, Y)
            val_loss += loss.item()
    model.train()
    return val_loss / len(val_loader)

# Save the model state dict
torch.save(model.state_dict(), "model.pth")

# Or save the entire model
torch.save(model, "model_full.pth")


In [23]:
# Check the unique values and range of 'month'
print("\nUnique 'month' values in the dataset:")
print(data_final['month'].unique())

print("\nMinimum and Maximum 'month' values:")
print(data_final['month'].min(), data_final['month'].max())



Unique 'month' values in the dataset:
[11]

Minimum and Maximum 'month' values:
11 11


In [24]:
# List of categorical features
categorical_features = ['minute', 'hour', 'day_of_week', 'month']

# Verify all categorical features are present
print("\nCategorical Features Present in data_final:")
print(data_final[categorical_features].head())



Categorical Features Present in data_final:
   minute  hour  day_of_week  month
0      30    14            0     11
1      31    14            0     11
2      32    14            0     11
3      33    14            0     11
4      34    14            0     11


In [25]:
# Fetch a single batch
X_real, X_cat, Y = next(iter(train_loader))

# Move to device
X_real = X_real.to(device)
X_cat = X_cat.to(device)
Y = Y.to(device)

# Forward pass
outputs = model(X_real, X_cat, torch.zeros((X_real.size(0), static_size)).to(device))
print(f"\nOutput shape: {outputs.shape}")  # Should be (batch_size, pred_length)



Output shape: torch.Size([64, 30])


In [None]:
print(f"\nVariableSelectionNetwork output size: {model.variable_selection.fc2.out_features}")


In [26]:
print(f"\nProjection Layer: {model.projection}")



Projection Layer: Linear(in_features=1, out_features=64, bias=True)


In [28]:
# Train the model
epochs = 20
train(model, train_loader, val_loader, criterion, optimizer, device, epochs)


Epoch [1/20], Loss: 0.0760
Epoch [2/20], Loss: 0.0612
Epoch [3/20], Loss: 0.0687
Epoch [4/20], Loss: 0.0580
Epoch [5/20], Loss: 0.0680
Epoch [6/20], Loss: 0.0644
Epoch [7/20], Loss: 0.0585
Epoch [8/20], Loss: 0.0201
Epoch [9/20], Loss: 0.0164
Epoch [10/20], Loss: 0.0135
Epoch [11/20], Loss: 0.0107
Epoch [12/20], Loss: 0.0100
Epoch [13/20], Loss: 0.0096
Epoch [14/20], Loss: 0.0089
Epoch [15/20], Loss: 0.0087
Epoch [16/20], Loss: 0.0072
Epoch [17/20], Loss: 0.0062
Epoch [18/20], Loss: 0.0053
Epoch [19/20], Loss: 0.0048
Epoch [20/20], Loss: 0.0045


In [29]:
def predict(model, data_loader, device):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for X_real, X_cat, Y in data_loader:
            X_real = X_real.to(device)
            X_cat = X_cat.to(device)
            Y = Y.to(device)
            
            outputs = model(X_real, X_cat, torch.zeros((X_real.size(0), static_size)).to(device))  # No static features
            predictions.append(outputs.cpu().numpy())
            actuals.append(Y.cpu().numpy())
    return np.concatenate(predictions), np.concatenate(actuals)

# Get predictions and actuals
preds, trues = predict(model, val_loader, device)


In [None]:
pip install torchviz


In [None]:
import os

save_path = '/kaggle/working/trained_model.pth'

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Flatten the arrays
preds_flat = preds.flatten()
trues_flat = trues.flatten()

# Calculate MAE and RMSE
mae = mean_absolute_error(trues_flat, preds_flat)
rmse = np.sqrt(mean_squared_error(trues_flat, preds_flat))

print(f"Validation MAE: {mae:.4f}")
print(f"Validation RMSE: {rmse:.4f}")


In [None]:
# Plot for the first sample in the validation set
sample_idx = 0

plt.figure(figsize=(12,6))
plt.plot(trues[sample_idx], label='Actual Close Price')
plt.plot(preds[sample_idx], label='Predicted Close Price')
plt.title(f"Close Price Forecast - Sample {sample_idx+1}")
plt.xlabel("Prediction Time Steps")
plt.ylabel("Close Price (Standardized)")
plt.legend()
plt.show()


In [None]:
# Inverse transform using the scaler
trues_original = scaler.inverse_transform(train_data[['Open', 'High', 'Low', 'Close', 'Volume', 
                                                    'MA_5', 'MA_10', 'MA_20', 'Return']].values)[:,3]  # 'Close' is the 4th column
preds_original = scaler.inverse_transform(train_data[['Open', 'High', 'Low', 'Close', 'Volume', 
                                                    'MA_5', 'MA_10', 'MA_20', 'Return']].values)[:,3]

# Adjust accordingly if using different splits


In [None]:
# Select a sample from validation set
sample_idx = 0

# Inverse transform
Y_true = trues[sample_idx]
Y_pred = preds[sample_idx]

# Since scaling was applied, we need to inverse it
# Extract only the 'Close' feature
# Assuming 'Close' is the 4th column in features_to_scale
close_index = features_to_scale.index('Close')
Y_true_original = scaler.inverse_transform(np.concatenate([np.zeros((len(Y_true), close_index)), Y_true.reshape(-1,1), 
                                                             np.zeros((len(Y_true), len(features_to_scale)-close_index-1))], axis=1))[:, close_index]
Y_pred_original = scaler.inverse_transform(np.concatenate([np.zeros((len(Y_pred), close_index)), Y_pred.reshape(-1,1), 
                                                             np.zeros((len(Y_pred), len(features_to_scale)-close_index-1))], axis=1))[:, close_index]

plt.figure(figsize=(12,6))
plt.plot(Y_true_original, label='Actual Close Price')
plt.plot(Y_pred_original, label='Predicted Close Price')
plt.title(f"Close Price Forecast - Sample {sample_idx+1}")
plt.xlabel("Prediction Time Steps")
plt.ylabel("Close Price ($)")
plt.legend()
plt.show()


In [None]:
def prepare_future_data(model, last_seq, future_steps, scaler, device):
    """
    Args:
        model: Trained TFT model
        last_seq (pd.DataFrame): The last 'seq_length' minutes of data
        future_steps (int): Number of future minutes to predict
        scaler: Fitted scaler
        device: torch device
    Returns:
        future_preds: Predicted 'Close' prices for future_steps
    """
    model.eval()
    with torch.no_grad():
        # Prepare the input tensors
        X_real = torch.tensor(last_seq[['Open', 'High', 'Low', 'Close', 'Volume', 
                                        'MA_5', 'MA_10', 'MA_20', 'Return']].values, dtype=torch.float32).unsqueeze(0).to(device)  # (1, seq_length, input_size)
        
        X_cat = torch.tensor(last_seq[['minute', 'hour', 'day_of_week', 'day_of_month', 'month', 'year']].astype(int).values, dtype=torch.long).unsqueeze(0).to(device)  # (1, seq_length, categorical_size)
        
        X_static = torch.zeros((1, static_size)).to(device)  # No static features
        
        # Predict
        preds = model(X_real, X_cat, X_static)  # (1, pred_length)
        
        # Convert to CPU and numpy
        preds = preds.cpu().numpy().flatten()
        
        # Inverse transform
        # Since only 'Close' was scaled, set other features to zero
        preds_scaled = np.concatenate([np.zeros((future_steps, close_index)), preds.reshape(-1,1), 
                                       np.zeros((future_steps, len(features_to_scale)-close_index-1))], axis=1)
        preds_original = scaler.inverse_transform(preds_scaled)[:, close_index]
        
        return preds_original


In [None]:
# Number of future steps to predict
future_steps = 30  # Next 30 minutes

# Get the last 'seq_length' minutes from the validation set
last_seq = val_data.iloc[-seq_length:]

# Prepare and predict
future_preds = prepare_future_data(model, last_seq, future_steps, scaler, device)

# Display the future predictions
print(f"Future {future_steps} minutes predictions for {ticker}:")
print(future_preds)


In [None]:
# Plot future predictions
plt.figure(figsize=(12,6))
plt.plot(range(future_steps), future_preds, label='Predicted Close Price')
plt.title(f"Future {future_steps} Minutes Close Price Forecast for {ticker}")
plt.xlabel("Future Time Steps")
plt.ylabel("Close Price ($)")
plt.legend()
plt.show()
