In [None]:
#clean data

In [1]:
pip install torch torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuff

In [2]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import yfinance as yf
import os
from google.colab import files

# Manually Upload CSV Instead of Using Kaggle
print("Upload your CSV file containing company data:")
uploaded = files.upload()

# Get the uploaded file name
csv_filename = list(uploaded.keys())[0]
print(f"Uploaded file: {csv_filename}")

def load_company_data(filepath):
    df = pd.read_csv(filepath)

    if 'Symbol' not in df.columns or 'Industry' not in df.columns:
        raise ValueError("CSV file must contain 'Symbol' and 'Industry' columns.")

    label_encoder = LabelEncoder()
    df['Industry Encoded'] = label_encoder.fit_transform(df['Industry'])
    return df['Symbol'].tolist(), df['Industry'].tolist(), df['Industry Encoded'].tolist()

def fetch_stock_data(ticker, start_date, end_date):
    try:
        data = yf.download(f"{ticker}.NS", start=start_date, end=end_date)
        return data if not data.empty else None
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

def compute_features(stock_data):
    numeric_cols = ['Open', 'High', 'Low', 'Close']
    stock_data[numeric_cols] = stock_data[numeric_cols].astype(float, errors='ignore')

    for window in [5, 10, 15, 20, 25, 30]:
        stock_data[f'MA_{window}'] = stock_data['Close'].rolling(window=window).mean()

    scaler = StandardScaler()
    stock_data['Normalised Close'] = scaler.fit_transform(stock_data[['Close']])

    stock_data['Return Ratio'] = (stock_data['Close'] - stock_data['Close'].shift(1)) / stock_data['Close'].shift(1)
    stock_data['Percentage Change Open'] = stock_data['Open'] / stock_data['Close'] - 1
    stock_data['Percentage Change High'] = stock_data['High'] / stock_data['Close'] - 1
    stock_data['Percentage Change Low'] = stock_data['Low'] / stock_data['Close'] - 1

    return stock_data

def align_with_reference(stock_data, reference_dates):
    start_ref, end_ref = reference_dates
    stock_dates = stock_data.index

    if stock_dates.min() > start_ref or stock_dates.max() < end_ref:
        return False
    return True

def main():
    start_date, end_date = "2022-01-10", "2025-01-10"
    companies, sector, sector_encoded = load_company_data(csv_filename)
    os.makedirs('Data_is_here', exist_ok=True)

    reference_data = fetch_stock_data("ONGC", start_date, end_date)
    if reference_data is None:
        print("Failed to fetch reference data.")
        return
    reference_dates = (reference_data.index.min(), reference_data.index.max())

    missing = []
    for ticker in companies:
        stock_data = fetch_stock_data(ticker, start_date, end_date)
        if stock_data is not None:
            if not align_with_reference(stock_data, reference_dates):
                print(f"Skipping {ticker} due to insufficient data.")
                missing.append(ticker)
                continue

            stock_data = compute_features(stock_data)

            idx = companies.index(ticker) if ticker in companies else -1
            if idx != -1:
                stock_data['Sector'] = sector[idx]
                stock_data['Sector Encoded'] = sector_encoded[idx]

            stock_data.to_csv(f'Data_is_here/{ticker}_data.csv')
            print(f"Processed and saved data for {ticker}.")
        else:
            print(f"Skipping {ticker} due to missing data.")
            missing.append(ticker)

    with open("Data_is_here/skipped_stocks.txt", "w") as file:
        for line in missing:
            file.write(line + "\n")

    print("All data processing completed.")

if __name__ == "__main__":
    main()


Upload your CSV file containing company data:


Saving ind_nifty500list_filtered_final.csv to ind_nifty500list_filtered_final.csv
Uploaded file: ind_nifty500list_filtered_final.csv
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ONGC.NS']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Failed to fetch reference data.


In [None]:
import os
import pandas as pd

file_path = "/content/Data_is_here"

# Ensure the directory exists
if not os.path.exists(file_path):
    print("Error: Directory does not exist.")
    exit()

# Get list of CSV files
files = [f for f in os.listdir(file_path) if f.endswith(".csv")]

for file in files:
    file_full_path = os.path.join(file_path, file)

    df = pd.read_csv(file_full_path)

    # Ensure 'Return Ratio' exists in DataFrame
    if 'Return Ratio' not in df.columns:
        print(f"Skipping {file}: 'Return Ratio' column not found.")
        continue

    # Create the Stock_Movement_Label column
    df['Stock_Movement_Label'] = (df['Return Ratio'].shift(-5) > 0).astype(int)

    # Save the modified file
    df.to_csv(file_full_path, index=False)
    print(f"Processed {file}")


Processed ZFCVINDIA_data.csv
Processed CEATLTD_data.csv
Processed BRITANNIA_data.csv
Processed TRENT_data.csv
Processed MANAPPURAM_data.csv
Processed ABSLAMC_data.csv
Processed CCL_data.csv
Processed ECLERX_data.csv
Processed LTTS_data.csv
Processed DALBHARAT_data.csv
Processed SPARC_data.csv
Processed VOLTAS_data.csv
Processed NESTLEIND_data.csv
Processed INDIGO_data.csv
Processed KIMS_data.csv
Processed CUMMINSIND_data.csv
Processed GUJGASLTD_data.csv
Processed BAJAJHLDNG_data.csv
Processed DBREALTY_data.csv
Processed ADANIGREEN_data.csv
Processed HINDZINC_data.csv
Processed TATACOMM_data.csv
Processed BATAINDIA_data.csv
Processed MAXHEALTH_data.csv
Processed NBCC_data.csv
Processed EIDPARRY_data.csv
Processed CENTRALBK_data.csv
Processed BANDHANBNK_data.csv
Processed TRIVENI_data.csv
Processed LAURUSLABS_data.csv
Processed MUTHOOTFIN_data.csv
Processed SCI_data.csv
Processed NTPC_data.csv
Processed DEEPAKFERT_data.csv
Processed TATAMOTORS_data.csv
Processed TIINDIA_data.csv
Processe

In [None]:
import os
import pandas as pd

# Define directories
file_path = "/content/Data_is_here"
save_path = "/content/Preprocessed_data"

# Ensure save directory exists
os.makedirs(save_path, exist_ok=True)

def set_data(file):
    file_full_path = os.path.join(file_path, file)

    # Check if file is empty
    if os.stat(file_full_path).st_size == 0:
        print(f"⚠️ Skipping empty file: {file}")
        return

    try:
        # Read CSV
        df = pd.read_csv(file_full_path)

        # Fill NaN values with 0
        df.fillna(0, inplace=True)

        # Drop the first two rows (assuming metadata)
        df = df.iloc[2:].reset_index(drop=True)

        # Remove unnamed columns
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

        # Debugging: Print columns before renaming
        print(f"Processing file: {file}")
        print(f"Original columns: {df.columns.tolist()} ({len(df.columns)} columns)")

        # Expected column names
        expected_columns = [
            "Date",
            "Close", "High", "Low", "Open",
            "Volume",
            "MA_5", "MA_15", "MA_10", "MA_20", "MA_25", "MA_30",
            "Normalized_Close",
            "Return_Ratio", "Percentage_Change_Open", "Percentage_Change_High", "Percentage_Change_Low",
            "Sector",
            "Sector_Encoded",
            "Stock_Movement_Label"
        ]

        # Fix: Only rename columns if the count matches
        if len(df.columns) == len(expected_columns):
            df.columns = expected_columns
        else:
            print(f"❌ Column mismatch in {file}. Expected {len(expected_columns)} but found {len(df.columns)}. Skipping renaming.")
            return  # Skip further processing

        # Drop unnecessary columns
        df = df.drop(columns=["Date", "Sector", "Volume", "Normalized_Close"], errors='ignore')

        # Convert to numeric
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Drop rows with NaN values and reset index
        df = df.dropna().reset_index(drop=True)

        # Save preprocessed file
        df.to_csv(os.path.join(save_path, file), index=False)
        print(f"✅ Processed {file} successfully.")

    except pd.errors.EmptyDataError:
        print(f"⚠️ Skipping {file}: File is empty or unreadable.")
    except Exception as e:
        print(f"❌ Error processing {file}: {e}")

# Process all files in the directory
files = os.listdir(file_path)

if not files:
    print("🚨 No CSV files found in the directory!")
else:
    for file in files:
        if file.endswith(".csv"):  # Only process CSV files
            set_data(file)


Processing file: ZFCVINDIA_data.csv
Original columns: ['Price', 'Close', 'High', 'Low', 'Open', 'Volume', 'MA_5', 'MA_10', 'MA_15', 'MA_20', 'MA_25', 'MA_30', 'Normalised Close', 'Return Ratio', 'Percentage Change Open', 'Percentage Change High', 'Percentage Change Low', 'Sector', 'Sector Encoded', 'Stock_Movement_Label'] (20 columns)
✅ Processed ZFCVINDIA_data.csv successfully.
Processing file: CEATLTD_data.csv
Original columns: ['Price', 'Close', 'High', 'Low', 'Open', 'Volume', 'MA_5', 'MA_10', 'MA_15', 'MA_20', 'MA_25', 'MA_30', 'Normalised Close', 'Return Ratio', 'Percentage Change Open', 'Percentage Change High', 'Percentage Change Low', 'Sector', 'Sector Encoded', 'Stock_Movement_Label'] (20 columns)
✅ Processed CEATLTD_data.csv successfully.
Processing file: BRITANNIA_data.csv
Original columns: ['Price', 'Close', 'High', 'Low', 'Open', 'Volume', 'MA_5', 'MA_10', 'MA_15', 'MA_20', 'MA_25', 'MA_30', 'Normalised Close', 'Return Ratio', 'Percentage Change Open', 'Percentage Change 

In [None]:
import os

file_path = "/content/Data_is_here"

if os.path.exists(file_path):
    files = [file for file in os.listdir(file_path) if file.endswith(".csv")]
    print(f"Number of companies (CSV files): {len(files)}")
else:
    print(f"Directory {file_path} does not exist.")


df = pd.read_csv("/content/Preprocessed_data/POWERGRID_data.csv")  # Replace with an actual filename
print(df.head())  # Check if "Stock_Movement_Label" is updated

Number of companies (CSV files): 444
        Close        High         Low        Open        MA_5  MA_15  MA_10  \
0  128.637039  130.686404  128.290219  130.686404    0.000000    0.0    0.0   
1  128.542450  129.772076  128.321752  128.983859    0.000000    0.0    0.0   
2  129.236084  130.339588  128.857742  129.898192    0.000000    0.0    0.0   
3  131.159348  133.366356  130.245021  130.907115    0.000000    0.0    0.0   
4  131.001709  132.199802  130.245026  131.127819  129.715326    0.0    0.0   

   MA_20  MA_25  MA_30  Return_Ratio  Percentage_Change_Open  \
0    0.0    0.0    0.0      0.000000                0.015931   
1    0.0    0.0    0.0     -0.000735                0.003434   
2    0.0    0.0    0.0      0.005396                0.005123   
3    0.0    0.0    0.0      0.014882               -0.001923   
4    0.0    0.0    0.0     -0.001202                0.000963   

   Percentage_Change_High  Percentage_Change_Low  Sector_Encoded  \
0                0.015931          

In [None]:
import pandas as pd

df = pd.read_csv("/content/Data_is_here/POWERGRID_data.csv")  # Replace with an actual filename
print(df.head())  # Check if "Stock_Movement_Label" is updated


        Price               Close                High                 Low  \
0      Ticker        POWERGRID.NS        POWERGRID.NS        POWERGRID.NS   
1        Date                 NaN                 NaN                 NaN   
2  2022-01-10   128.6370391845703  130.68640377942253  128.29021850612708   
3  2022-01-11  128.54244995117188  129.77207639968722  128.32175171550662   
4  2022-01-12    129.236083984375  130.33958803879312  128.85774230295706   

                 Open        Volume  MA_5  MA_10  MA_15  MA_20  MA_25  MA_30  \
0        POWERGRID.NS  POWERGRID.NS   NaN    NaN    NaN    NaN    NaN    NaN   
1                 NaN           NaN   NaN    NaN    NaN    NaN    NaN    NaN   
2  130.68640377942253      11085710   NaN    NaN    NaN    NaN    NaN    NaN   
3  128.98385925155756       7208789   NaN    NaN    NaN    NaN    NaN    NaN   
4  129.89819154864816      11018139   NaN    NaN    NaN    NaN    NaN    NaN   

   Normalised Close  Return Ratio  Percentage Change Ope

In [None]:
#graph

In [None]:
# import os
# import numpy as np
# import pandas as pd
# import torch

# # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # class StockDataLoader:
#     def __init__(self, data_dir, window_size):
#         self.data_dir = data_dir
#         self.window_size = window_size
#         self.file_paths = [
#             os.path.join(data_dir, f)
#             for f in os.listdir(data_dir)
#             if f.endswith('.csv')
#         ]
#         self.data, self.stock_names, self.labels, self.returns = self._load_all_data()
#         self.num_stocks = len(self.data)

#     def _load_all_data(self):
#         """Load all stock data into memory, separating features, labels, and next-day returns."""
#         data = []
#         stock_names = []
#         labels = []  # For stock movement (y_move)
#         returns = []  # For next day's return ratio (y_return)

#         for path in self.file_paths:
#             df = pd.read_csv(path)
#             df = df.loc[:, ~df.columns.str.contains('^Unnamed')]  # Remove unnamed columns

#             # Ensure the dataset contains a 'Close' column
#             if 'Close' not in df.columns:
#                 raise ValueError(f"Dataset {path} does not contain a 'Close' column.")

#             # Separate features and labels (assuming labels are in the last column)
#             feature_columns = df.columns[:-1]  # All columns except the last one (features)
#             label_column = df.columns[-1]  # Last column is the movement label

#             stock_data = df[feature_columns].values  # Features (timesteps, features)
#             label_data = df[label_column].values   # Movement labels (timesteps, 1)

#             # Compute next day's return ratio
#             close_prices = df['Close'].values
#             return_ratio = np.zeros_like(close_prices)  # Initialize array for returns
#             return_ratio[:-1] = (close_prices[1:] / close_prices[:-1]) - 1  # (P_t+1 / P_t) - 1
#             return_ratio[-1] = 0  # No next day return for the last entry

#             stock_names.append(os.path.basename(path))  # Store the stock name (filename)
#             data.append(stock_data)
#             labels.append(label_data)  # Store movement labels
#             returns.append(return_ratio)  # Store return ratios

#         return data, stock_names, labels, returns

#     def __iter__(self):
#         """Generator that yields batches using a sliding window with batch size = number of stocks."""
#         while True:
#             batch = []
#             stock_batch_names = []
#             batch_labels = []  # Movement labels (single value for the next day)
#             batch_returns = []  # Next day's return ratio (single value)

#             for i in range(self.num_stocks):
#                 stock_data = self.data[i]
#                 stock_labels = self.labels[i]
#                 stock_returns = self.returns[i]  # Get return ratios

#                 # Randomly choose a start index for the sliding window, ensuring we don't go out of bounds
#                 start_idx = np.random.randint(0, len(stock_data) - self.window_size)

#                 window = stock_data[start_idx:start_idx + self.window_size]  # Sliding window of features

#                 # The next day's values for movement and return
#                 next_day_label = stock_labels[start_idx + self.window_size]  # Movement label (next day)
#                 next_day_return = stock_returns[start_idx + self.window_size]  # Return ratio (next day)

#                 # Append the single value of movement and return for the next day
#                 batch.append(window)
#                 batch_labels.append(next_day_label)  # Append the next day's movement label
#                 batch_returns.append(next_day_return)  # Append the next day's return ratio
#                 stock_batch_names.append(self.stock_names[i][:-9])  # Get the stock name

#             # Stack to form (num_stocks, window_size, features) and labels (num_stocks,)
#             batch_tensor = torch.tensor(np.stack(batch), dtype=torch.float32).to(device)
#             labels_tensor = torch.tensor(np.stack(batch_labels), dtype=torch.float32).to(device)  # Movement labels (num_stocks,)
#             returns_tensor = torch.tensor(np.stack(batch_returns), dtype=torch.float32).to(device)  # Return ratios (num_stocks,)

#             # Yield features, stock names, movement labels, and return ratios
#             yield batch_tensor, stock_batch_names, labels_tensor, returns_tensor

# # Example usage
# data_dir = '/content/Preprocessed_data'  # Directory containing 403 CSV files
# window_size = 30  # Number of timesteps per bdonatch for each stock

# # Initialize the data loader
# data_loader = StockDataLoader(data_dir, window_size)

# # Create an iterator
# data_iter = iter(data_loader)

# # Fetch one batch
# batch, stock_names, y_move, y_return = next(data_iter)

# print(batch.device)       # Output: torch.Size([num_stocks, 30, num_features])
# # print(stock_names)  # Number of stock names
# print(y_move.device)      # Output: torch.Size([num_stocks,]) (next day's movement)
# print(y_return.device)    # Output: torch.Size([num_stocks,]) (next day's return)

In [6]:
import os
import numpy as np
import pandas as pd
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class StockDataLoader:
    def __init__(self, data_dir, window_size):
        self.data_dir = data_dir
        self.window_size = window_size
        self.file_paths = [
            os.path.join(data_dir, f)
            for f in os.listdir(data_dir)
            if f.endswith('.csv')
        ]
        self.data, self.stock_names, self.labels, self.returns = self._load_all_data()
        self.num_stocks = len(self.data)

    def _load_all_data(self):
        """Load all stock data into memory, separating features, labels, and next-day returns."""
        data = []
        stock_names = []
        labels = []  # For stock movement (y_move)
        returns = []  # For next day's return ratio (y_return)

        for path in self.file_paths:
            df = pd.read_csv(path)
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]  # Remove unnamed columns

            # Ensure the dataset contains a 'Close' column
            if 'Close' not in df.columns:
                raise ValueError(f"Dataset {path} does not contain a 'Close' column.")

            # Separate features and labels (assuming labels are in the last column)
            feature_columns = df.columns[:-1]  # All columns except the last one (features)
            label_column = df.columns[-1]  # Last column is the movement label

            stock_data = df[feature_columns].values  # Features (timesteps, features)
            label_data = df[label_column].values   # Movement labels (timesteps, 1)

            # Compute next day's return ratio
            close_prices = df['Close'].values
            return_ratio = np.zeros_like(close_prices)  # Initialize array for returns
            return_ratio[:-1] = (close_prices[1:] / close_prices[:-1]) - 1  # (P_t+1 / P_t) - 1
            # return_ratio[-1] = 0  # No next day return for the last entry

            stock_names.append(os.path.basename(path))  # Store the stock name (filename)
            data.append(stock_data)
            labels.append(label_data)  # Store movement labels
            returns.append(return_ratio)  # Store return ratios

        return data, stock_names, labels, returns

    def __iter__(self):
        """Generator that yields batches using a sliding window with batch size = number of stocks."""
        while True:
            batch = []
            stock_batch_names = []
            batch_labels = []  # Movement labels (single value for the next day)
            batch_returns = []  # Next day's return ratio (single value)

            for i in range(self.num_stocks):
                stock_data = self.data[i]
                stock_labels = self.labels[i]
                stock_returns = self.returns[i]  # Get return ratios

                # Randomly choose a start index for the sliding window, ensuring we don't go out of bounds
                start_idx = np.random.randint(0, len(stock_data) - self.window_size)

                window = stock_data[start_idx:start_idx + self.window_size]  # Sliding window of features

                # The next day's values for movement and return
                next_day_label = stock_labels[start_idx + self.window_size]  # Movement label (next day)
                next_day_return = stock_returns[start_idx + self.window_size]  # Return ratio (next day)

                # Append the single value of movement and return for the next day
                batch.append(window)
                batch_labels.append(next_day_label)  # Append the next day's movement label
                batch_returns.append(next_day_return)  # Append the next day's return ratio
                stock_batch_names.append(self.stock_names[i][:-9])  # Get the stock name

            # Stack to form (num_stocks, window_size, features) and labels (num_stocks,)
            batch_tensor = torch.tensor(np.stack(batch), dtype=torch.float32).to(device)
            labels_tensor = torch.tensor(np.stack(batch_labels), dtype=torch.float32).to(device)  # Movement labels (num_stocks,)
            returns_tensor = torch.tensor(np.stack(batch_returns), dtype=torch.float32).to(device)  # Return ratios (num_stocks,)

            # Yield features, stock names, movement labels, and return ratios
            yield batch_tensor, stock_batch_names, labels_tensor, returns_tensor

# Example usage
data_dir = 'Preprocessed_data'  # Directory containing 403 CSV files
window_size = 30  # Number of timesteps per batch for each stock

# Initialize the data loader
data_loader = StockDataLoader(data_dir, window_size)

# Create an iterator
data_iter = iter(data_loader)

# Fetch one batch
batch, stock_names, y_move, y_return = next(data_iter)

print(batch.device)       # Output: torch.Size([num_stocks, 30, num_features])
# print(stock_names)  # Number of stock names
print(y_move.device)      # Output: torch.Size([num_stocks,]) (next day's movement)
print(y_return.device)    # Output: torch.Size([num_stocks,]) (next day's return)

cuda:0
cuda:0
cuda:0


In [7]:
import torch
import torch.nn as nn
from torch_geometric.nn import GATConv

class StockGAT(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, heads=4, dropout=0.1):
        """
        Graph Attention Network (GAT) for modeling stock relationships within a sector.

        - in_dim: Input embedding size (e.g., 64)
        - hidden_dim: Size of hidden layer
        - out_dim: Output embedding size (final node representation)
        - heads: Number of attention heads
        - dropout: Dropout rate
        """
        super(StockGAT, self).__init__()
        self.gat1 = GATConv(in_dim, hidden_dim, heads=heads, dropout=dropout)
        self.gat2 = GATConv(hidden_dim * heads, out_dim, heads=1, dropout=dropout)
        self.relu = nn.ReLU()

    def forward(self, x, edge_index):
        """
        x: Tensor of stock embeddings (num_stocks, num_weeks, hidden_dim)
        edge_index: Graph connections (2, num_edges)
        """
        # Flatten the input tensor for GAT (num_stocks * num_weeks, hidden_dim)
        x = x.view(-1, x.size(-1)).to(device)  # Ensure x is on device
        edge_index = edge_index.to(device)  # Explicitly move edge_index
        # edge_index = edge_index.to(device)
        # Apply GAT layers
        x = self.gat1(x, edge_index)
        x = self.relu(x)
        x = self.gat2(x, edge_index)

        # Reshape the output back to (num_stocks, num_weeks, hidden_dim)
        x = x.view(-1, 6, x.size(-1))  # Assuming 6 weeks per stock

        return x

def process_batch(batch_stock_names, ai_batch, model, intra_sector_stock_2_stock):
    """
    Process a batch of stock names and their respective neighbors through the StockGAT model.

    - batch_stock_names: List of stock names in the current batch.
    - ai_batch: Tensor containing the embeddings of the stocks, shape (batch_size, num_weeks, hidden_dim).
    - model: The GAT model.
    - intra_sector_stock_2_stock: Dictionary mapping a stock to its neighbors.

    Returns:
    - A tensor of shape (batch_size, num_weeks, hidden_dim) representing the output embeddings.
    """
    # Create a list to store the edge connections for the entire batch
    edge_index_list = []

    for idx, name in enumerate(batch_stock_names):
        # Find neighbors of the current stock
        neighbour_names = intra_sector_stock_2_stock.get(name, [])

        # Add edges for the stock and its neighbors
        for neighbour in neighbour_names:
            neighbour_idx = batch_stock_names.index(neighbour)
            edge_index_list.append([idx, neighbour_idx])  # Add edge from the stock to its neighbor
            edge_index_list.append([neighbour_idx, idx])  # Add edge from the neighbor to the stock

    # Convert edge_index list to a tensor
    edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()
    edge_index = edge_index.to(device)
    # Pass the embeddings through the model
    gi_batch = model(ai_batch, edge_index)

    return gi_batch

# # Example usage
# batch_stock_names = ["AAPL", "GOOG", "AMZN", "MSFT"]
# ai_batch = torch.randn(4, 6, 64)  # Simulated embeddings for 4 stocks over 6 weeks
# intra_sector_stock_2_stock = {
#     "AAPL": ["GOOG", "MSFT"],
#     "GOOG": ["AAPL", "AMZN"],
#     "AMZN": ["GOOG", "MSFT"],
#     "MSFT": ["AAPL", "AMZN"]
# }
# model = StockGAT(in_dim=64, hidden_dim=64, out_dim=64)  # Example GAT model

# gi_batch = process_batch(batch_stock_names, ai_batch, model, intra_sector_stock_2_stock)
# # print(output_embeddings.shape)  # Expected shape: (4, 6, 64)

In [8]:
pip install matplotlib



In [9]:
import torch
import torch.nn as nn

class AttentiveLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, days_per_week=5):
        super(AttentiveLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.days_per_week = days_per_week
        # Replacing GRU with LSTM
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        """
        x: Tensor of shape (batch_size, total_days, input_dim)
        """
        batch_size, total_days, _ = x.shape

        # Number of weeks
        num_weeks = total_days // self.days_per_week

        # Reshape to (batch_size, num_weeks, days_per_week, input_dim)
        x_reshaped = x.view(batch_size, num_weeks, self.days_per_week, -1)

        weekly_embeddings = []

        for i in range(num_weeks):
            week_input = x_reshaped[:, i, :, :]  # Shape: (batch_size, days_per_week, input_dim)

            # LSTM forward pass
            h_seq, (h_n, c_n) = self.lstm(week_input)  # (batch_size, days_per_week, hidden_dim)

            # Compute attention scores
            attn_weights = torch.tanh(self.attention(h_seq))  # (batch_size, days_per_week, 1)
            attn_weights = torch.softmax(attn_weights, dim=1)  # Normalize over time dimension

            # Compute weighted sum for weekly embeddings
            weekly_embedding = torch.sum(attn_weights * h_seq, dim=1)  # (batch_size, hidden_dim)

            weekly_embeddings.append(weekly_embedding)

        # Stack to get final shape (batch_size, num_weeks, hidden_dim)
        weekly_embeddings = torch.stack(weekly_embeddings, dim=1)

        return weekly_embeddings

# Example usage
# batch_size = 403
# total_days = 30  # 30 days
# input_dim = 15  # Feature dimension per day

# model = AttentiveLSTM(input_dim=input_dim, hidden_dim=64, days_per_week=5)
# dummy_input = torch.randn(batch_size, total_days, input_dim)  # Simulated daily features
# weekly_embeddings = model(dummy_input)  # Output shape: (batch_size, num_weeks, hidden_dim)

# print(weekly_embeddings.shape)  # Expected output: (403, 6, 64)


In [10]:
import torch
import torch.nn as nn

class LongTermAttentionBlock(nn.Module):
    def __init__(self, input_dim, hidden_dim, t=5):
        """
        A block that processes `ai_batch` (stock-specific embeddings) and `gi_batch` (sector-level embeddings)
        to compute the long-term trends using an attention mechanism.

        - input_dim: The dimension of the input embeddings (e.g., 64).
        - hidden_dim: The size of the hidden layer.
        - t: The look-back period (i.e., number of past weeks to consider for attention).
        """
        super(LongTermAttentionBlock, self).__init__()
        self.t = t
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        # Learnable parameters W and V for the attention mechanism
        self.W = nn.Parameter(torch.randn(input_dim, hidden_dim))
        self.V = nn.Parameter(torch.randn(hidden_dim, 1))

    def forward(self, ai_batch, gi_batch):
        """
        ai_batch: Tensor of shape (batch_size, t, input_dim) representing stock-specific embeddings.
        gi_batch: Tensor of shape (batch_size, t, input_dim) representing sector-level embeddings.

        Returns:
        - long_term_ai: Long-term embedding for stock-level trends (shape: (batch_size, hidden_dim)).
        - long_term_gi: Long-term embedding for sector-level trends (shape: (batch_size, hidden_dim)).
        """
        # Process the ai_batch (stock-specific embeddings)
        ai_attention_weights = self.compute_attention(ai_batch)  # (batch_size, t)

        # Expand the attention weights to shape (batch_size, t, 1) for broadcasting
        ai_attention_weights = ai_attention_weights.unsqueeze(-1)  # (batch_size, t, 1)

        # Compute weighted sum for ai_batch long-term embedding
        long_term_ai = torch.sum(ai_attention_weights * ai_batch, dim=1)  # (batch_size, input_dim)

        # Process the gi_batch (sector-level embeddings)
        gi_attention_weights = self.compute_attention(gi_batch)  # (batch_size, t)

        # Expand the attention weights to shape (batch_size, t, 1) for broadcasting
        gi_attention_weights = gi_attention_weights.unsqueeze(-1)  # (batch_size, t, 1)

        # Compute weighted sum for gi_batch long-term embedding
        long_term_gi = torch.sum(gi_attention_weights * gi_batch, dim=1)  # (batch_size, input_dim)

        return long_term_ai, long_term_gi

    def compute_attention(self, u_batch):
        """
        Compute attention weights using the formula:

        αj = exp(W^T * tanh(V * uj)) / sum(exp(W^T * tanh(V * uk))) for k in (i-t, ..., i-1)

        u_batch: Tensor of shape (batch_size, t, input_dim)
        """
        batch_size, t, input_dim = u_batch.shape

        # Apply the attention mechanism
        u_flat = u_batch.view(batch_size * t, input_dim)  # Flatten to shape (batch_size * t, input_dim)
        u_tanh = torch.tanh(torch.matmul(u_flat, self.W))  # (batch_size * t, hidden_dim)
        attention_scores = torch.matmul(u_tanh, self.V)  # (batch_size * t, 1)

        # Reshape attention_scores back to (batch_size, t)
        attention_scores = attention_scores.view(batch_size, t)  # (batch_size, t)

        # Apply softmax to get attention weights
        attention_weights = torch.softmax(attention_scores, dim=1)  # (batch_size, t)

        return attention_weights

# # Example usage
# batch_size = 407
# t = 6  # Look-back period for long-term embedding
# input_dim = 64
# hidden_dim = 64

# # Simulate the ai_batch (stock-specific) and gi_batch (sector-level) embeddings
# ai_batch = torch.randn(batch_size, t, input_dim)
# gi_batch = torch.randn(batch_size, t, input_dim)

# # Initialize the long-term attention block
# long_term_block = LongTermAttentionBlock(input_dim=input_dim, hidden_dim=hidden_dim, t=t)

# # Get the long-term embeddings for both stock-level and sector-level trends
# long_term_ai, long_term_gi = long_term_block(ai_batch, gi_batch)

# print("Long-term stock-level trend:", long_term_ai.shape)  # (batch_size, hidden_dim)
# print("Long-term sector-level trend:", long_term_gi.shape)  # (batch_size, hidden_dim)

In [11]:
import torch
import torch.nn as nn

class SectorEmbeddingBlock(nn.Module):
    def __init__(self, input_dim, num_sectors, t=5):
        """
        A block that processes `long_term_gi_batch` (sector-level embeddings) and computes the sector-level embeddings
        using max pooling.

        - input_dim: The dimension of the input embeddings (e.g., 64).
        - num_sectors: Number of sectors.
        - t: The look-back period (i.e., number of past weeks to consider for attention).
        """
        super(SectorEmbeddingBlock, self).__init__()
        self.t = t
        self.input_dim = input_dim
        self.num_sectors = num_sectors

    def forward(self, long_term_gi_batch, batch_names, sector_dict):
        """
        long_term_gi_batch: Tensor of shape (batch_size, input_dim) representing long-term sector embeddings.
        batch_names: List of stock names corresponding to each embedding in long_term_gi_batch.
        sector_dict: Dictionary mapping sector names to a list of stock names in that sector.

        Returns:
        - sector_embeddings: Tensor of shape (num_sectors, input_dim) representing the pooled
          embeddings for each sector.
        """
        # Initialize a tensor to store sector embeddings
        sector_embeddings = torch.zeros(self.num_sectors, self.input_dim).to(long_term_gi_batch.device)

        # Step 1: Max pooling across stocks within each sector
        for sector_idx, sector in enumerate(sector_dict.keys()):
            # Get the list of stock names in the current sector
            sector_stocks = sector_dict[sector]

            # Get the indices of stocks that belong to the current sector
            sector_indices = [i for i, name in enumerate(batch_names) if name in sector_stocks]

            # Extract the embeddings for the stocks in this sector
            sector_embeddings_tensor = long_term_gi_batch[sector_indices]  # (num_stocks_in_sector, input_dim)

            # Max pooling across the time dimension (axis=0)
            # Check if sector_embeddings_tensor is empty
            if sector_embeddings_tensor.shape[0] > 0:
                sector_embeddings[sector_idx] = torch.max(sector_embeddings_tensor, dim=0)[0]
            else:
                print(f"Empty tensor encountered for sector {sector_idx}")
                # Handle empty tensor case (e.g., skip or use a default embedding)
        return sector_embeddings

# # Example usage
# batch_size = 403
# t = 6  # Look-back period for long-term embedding
# input_dim = 64
# num_sectors = 5  # Number of sectors

# # Simulate the long_term_gi_batch (sector-level) embeddings
# long_term_gi_batch = torch.randn(batch_size, input_dim)
# # Simulate the stock names for each embedding
# batch_names = [f"stock_{i}" for i in range(batch_size)]

# # Simulate a sector dictionary with some dummy data
# sector_dict = {
#     "sector_1": [f"stock_{i}" for i in range(0, 100)],
#     "sector_2": [f"stock_{i}" for i in range(100, 200)],
#     "sector_3": [f"stock_{i}" for i in range(200, 300)],
#     "sector_4": [f"stock_{i}" for i in range(300, 400)],
#     "sector_5": [f"stock_{i}" for i in range(400, 403)]
# }

# # Initialize the sector embedding block
# sector_embedding_block = SectorEmbeddingBlock(input_dim=input_dim, num_sectors=num_sectors, t=t)

# # Get the sector-level embeddings
# sector_embeddings = sector_embedding_block(long_term_gi_batch, batch_names, sector_dict)

# print("Sector embeddings:", sector_embeddings.shape)  # (num_sectors, input_dim)

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data

class GATEmbeddingBlock(nn.Module):
    def __init__(self, input_dim, num_sectors, num_heads=1):
        """
        A block that takes sector embeddings and applies Graph Attention Network (GAT)

        - input_dim: The dimension of the input embeddings (e.g., 64).
        - num_sectors: Number of sectors.
        - num_heads: The number of attention heads for GAT.
        """
        super(GATEmbeddingBlock, self).__init__()

        self.num_heads = num_heads

        # GAT layer to process the sector embeddings
        self.gat = GATConv(input_dim, input_dim, heads=num_heads, concat=False)

    def forward(self, sector_embeddings, edge_index):
        """
        sector_embeddings: Tensor of shape (num_sectors, input_dim) representing the sector embeddings.
        edge_index: The graph structure (edge indices) for GAT.

        Returns:
        - learned_sector_embeddings: Tensor of shape (num_sectors, input_dim) representing the updated sector embeddings.
        """
        # Apply GAT
        learned_sector_embeddings = self.gat(sector_embeddings, edge_index)

        return learned_sector_embeddings

# # Example usage
# batch_size = 403
# t = 6  # Look-back period for long-term embedding
# input_dim = 64
# num_sectors = 5  # Number of sectors

# # Simulate the gi_batch (sector-level) embeddings
# gi_batch = torch.randn(batch_size, input_dim)
# # Simulate the stock names for each embedding
# batch_names = [f"stock_{i}" for i in range(batch_size)]

# # Simulate a sector dictionary with some dummy data
# sector_dict = {
#     "sector_1": [f"stock_{i}" for i in range(0, 100)],
#     "sector_2": [f"stock_{i}" for i in range(100, 200)],
#     "sector_3": [f"stock_{i}" for i in range(200, 300)],
#     "sector_4": [f"stock_{i}" for i in range(300, 400)],
#     "sector_5": [f"stock_{i}" for i in range(400, 403)]
# }

# # Initialize the sector embedding block
# sector_embedding_block = SectorEmbeddingBlock(input_dim=input_dim, num_sectors=num_sectors, t=t)

# # Get the sector-level embeddings
# sector_embeddings = sector_embedding_block(gi_batch, batch_names, sector_dict)

# # Construct a fully connected graph for sectors
# # Fully connected graph means every sector is connected to every other sector
# edge_index = torch.combinations(torch.arange(num_sectors), r=2).T  # (2, num_edges)

# # Initialize the GAT embedding block
# gat_embedding_block = GATEmbeddingBlock(input_dim=input_dim, num_sectors=num_sectors, num_heads=1)

# # Get the learned sector embeddings after applying GAT
# learned_sector_embeddings = gat_embedding_block(sector_embeddings, edge_index)

# print("Learned sector embeddings:", learned_sector_embeddings.shape)  # (num_sectors, input_dim)

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data

# Function to map stocks to sector embeddings and concatenate them
def map_stocks_to_sector_embeddings(batch_names, sector_dict, sector_embeddings, input_dim, stock_embeddings):
    """
    Map stocks in `batch_names` to their sector embeddings based on `sector_dict`.
    Then, concatenate stock-level, sector-level, and inter-sector embeddings.

    - batch_names: List of stock names in the order they appear in the embeddings.
    - sector_dict: Dictionary mapping sector names to a list of stock names in that sector.
    - sector_embeddings: Tensor of sector embeddings (num_sectors, input_dim).
    - input_dim: The embedding size (dimensionality of each embedding vector).

    Returns:
    - final_embeddings: Tensor containing the concatenated embeddings for each stock.
    """
    batch_size = len(batch_names)
    #print("batch size is", batch_size)
    sector_map = {name: sector for sector, names in sector_dict.items() for name in names}
    # Create a tensor to hold the mapped inter-sector embeddings for each stock
    mapped_inter_sector_embeddings = torch.ones(batch_size, input_dim, device=stock_embeddings.device)
    # Iterate over batch_names and assign sector embeddings
    for i, stock in enumerate(batch_names):
        if stock not in sector_map.keys():
            #print(stock," not in sector_map")
            continue
        sector_name = sector_map[stock]  # Find the sector of stock i
        sector_id = list(sector_dict.keys()).index(sector_name)  # Get sector index
        mapped_inter_sector_embeddings[i] = sector_embeddings[sector_id]  # Map sector embedding
    # print("Stcok embeddgins", stock_embeddings.shape)
    # print("mapped_inter_sector_embeddings shape",mapped_inter_sector_embeddings.shape)
    # Concatenate stock-level, sector-level, and inter-sector embeddings
    return mapped_inter_sector_embeddings

# Define the GAT block for sector embeddings processing
class GATEmbeddingBlock(nn.Module):
    def __init__(self, input_dim, num_sectors, num_heads=1):
        super(GATEmbeddingBlock, self).__init__()
        self.num_heads = num_heads
        self.gat = GATConv(input_dim, input_dim, heads=num_heads, concat=False)

    def forward(self, sector_embeddings, edge_index):
        learned_sector_embeddings = self.gat(sector_embeddings, edge_index)
        return learned_sector_embeddings

# Example usage
# batch_size = 407
# input_dim = 64
# num_sectors = 5  # Number of sectors
# t = 6  # Look-back period for long-term embedding

# # Simulate stock-level embeddings (gi_batch)
# stock_embeddings = torch.randn(batch_size, input_dim)  # (403, 64) for stock-specific
# # Simulate sector-level embeddings (sector_embeddings)
# sector_embeddings = torch.randn(num_sectors, input_dim)  # (5, 64) for sector-level embeddings

# # Simulate batch_names (list of stock names)
# batch_names = [f"stock_{i}" for i in range(batch_size)]

# # Simulate sector dictionary (mapping sectors to stock names)
# sector_dict = {
#     "sector_1": [f"stock_{i}" for i in range(0, 100)],
#     "sector_2": [f"stock_{i}" for i in range(100, 200)],
#     "sector_3": [f"stock_{i}" for i in range(200, 300)],
#     "sector_4": [f"stock_{i}" for i in range(300, 400)],
#     "sector_5": [f"stock_{i}" for i in range(400, 403)],
# }

# # Call the function to map stocks to sector embeddings and concatenate them
# final_embeddings = map_stocks_to_sector_embeddings(batch_names, sector_dict, sector_embeddings, input_dim)

# # Construct a fully connected graph for sectors
# edge_index = torch.combinations(torch.arange(num_sectors), r=2).T  # (2, num_edges)

# # Initialize the GAT embedding block
# gat_embedding_block = GATEmbeddingBlock(input_dim=input_dim, num_sectors=num_sectors, num_heads=1)

# # Get the learned sector embeddings after applying GAT
# learned_sector_embeddings = gat_embedding_block(sector_embeddings, edge_index)

# print("Final concatenated embeddings shape:", final_embeddings.shape)  # Should be (403, 128)
# print("Learned sector embeddings shape:", learned_sector_embeddings.shape)  # (num_sectors, input_dim)

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class EmbeddingFusionLayer(nn.Module):
    def __init__(self, input_dim):
        """
        The Embedding Fusion Layer that combines short-term, intra-sector, and inter-sector embeddings.

        - input_dim: The dimensionality of the individual embeddings (e.g., 64).
        """
        super(EmbeddingFusionLayer, self).__init__()

        # Learnable weight matrix for fusion
        self.Wf = nn.Linear(input_dim * 3, input_dim)  # Input is concatenated, so 3 * input_dim

    def forward(self, short_term_embeddings, intra_sector_embeddings, inter_sector_embeddings_mapped):
        """
        Forward pass to combine the embeddings.

        - short_term_embeddings: Tensor of shape (batch_size, input_dim)
        - intra_sector_embeddings: Tensor of shape (batch_size, input_dim)
        - inter_sector_embeddings: Tensor of shape (batch_size, input_dim)

        Returns:
        - final_embeddings: Tensor of shape (batch_size, input_dim)
        """
        # Concatenate the embeddings along the feature dimension (axis 1)
        concatenated_embeddings = torch.cat(
            (short_term_embeddings, intra_sector_embeddings, inter_sector_embeddings_mapped), dim=1
        )  # Shape: (batch_size, input_dim*3)

        # Apply the fusion weight matrix (Wf) and ReLU activation
        final_embeddings = F.relu(self.Wf(concatenated_embeddings))

        return final_embeddings

# # Example usage
# batch_size = 403
# input_dim = 64

# # Simulate the individual embeddings (for simplicity)
# short_term_embeddings = torch.randn(batch_size, input_dim)  # (batch_size, input_dim)
# intra_sector_embeddings = torch.randn(batch_size, input_dim)  # (batch_size, input_dim)
# inter_sector_embeddings_mapped = torch.randn(batch_size, input_dim)  # (batch_size, input_dim)

# # Initialize the embedding fusion layer
# embedding_fusion_layer = EmbeddingFusionLayer(input_dim=input_dim)

# # Get the final fused embeddings
# final_embeddings = embedding_fusion_layer(short_term_embeddings, intra_sector_embeddings, inter_sector_embeddings_mapped)

# print("Final fused embeddings shape:", final_embeddings.shape)  # Should be (batch_size, input_dim)

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LossFunction(nn.Module):
    def __init__(self, input_dim, return_dim=1, move_dim=1, lambda_reg=0.01):
        """
        Model to predict return ratio and stock movement.

        - input_dim: The input dimension (e.g., size of the final fused embedding).
        - return_dim: Output dimension for return ratio (typically 1).
        - move_dim: Output dimension for movement (typically 1, binary classification).
        - lambda_reg: Regularization parameter for L2 regularization.
        """
        super(LossFunction, self).__init__()

        # Task-specific parameters
        self.e1 = nn.Parameter(torch.randn(input_dim, device=device))  # Hidden vector for return ratio
        self.b1 = nn.Parameter(torch.zeros(1, device=device))  # Bias for return ratio

        self.e2 = nn.Parameter(torch.randn(input_dim, device=device))  # Hidden vector for stock movement
        self.b2 = nn.Parameter(torch.zeros(1, device=device))  # Bias for stock movement

        self.lambda_reg = lambda_reg  # Regularization parameter

    def get_ys(self, tau_F):
        """
        Forward pass for return and movement prediction.

        - tau_F: The final fused embeddings of shape (batch_size, input_dim).

        Returns:
        - y_return: Predicted return ratio.
        - y_move: Predicted stock movement.
        """
        # Calculate return prediction
        y_return = torch.matmul(tau_F, self.e1) + self.b1

        # Calculate movement prediction (sigmoid for binary classification)
        y_move = torch.sigmoid(torch.matmul(tau_F, self.e2) + self.b2)

        return y_return, y_move

    def compute_rank_loss(self, y_return, y_true_return):
        """
        Compute the pairwise ranking loss (Lrank).

        - y_return: Predicted return ratio for each stock.
        - y_true_return: Ground truth return ratio for each stock.

        Returns:
        - ranking_loss: The ranking loss for all pairs of stocks.
        """
        # Vectorized pairwise ranking loss computation
        y_diff_pred = y_return.unsqueeze(1) - y_return.unsqueeze(0)  # (N, N) matrix
        y_diff_true = y_true_return.unsqueeze(1) - y_true_return.unsqueeze(0)  # (N, N) matrix

        mask = (y_diff_true > 0).float()  # Only consider positive ranking differences
        Lrank = torch.sum(F.relu(-y_diff_pred * y_diff_true) * mask) / (torch.sum(mask) + 1e-8)  # Avoid division by zero

        return Lrank

    def compute_move_loss(self, y_move, y_true_move):
        """
        Compute the cross-entropy loss for stock movement prediction (Lmove).

        - y_move: Predicted movement (binary classification).
        - y_true_move: Ground truth movement (0 or 1).

        Returns:
        - movement_loss: The cross-entropy loss for stock movement.
        """
        return F.binary_cross_entropy(y_move, y_true_move)

    def forward(self, tau_F, y_true_return, y_true_move, delta=0.5):
        """
        Compute the total loss combining ranking loss, movement loss, and L2 regularization.

        - tau_F: Final fused embeddings (used in return and movement prediction).
        - y_true_return: Ground truth return ratios.
        - y_true_move: Ground truth stock movements (binary).
        - delta: Weight for ranking vs. movement loss.

        Returns:
        - total_loss: The final loss value (weighted sum of ranking and movement losses).
        """
        # Predict return and movement
        y_return, y_move = self.get_ys(tau_F)

        # Compute ranking loss
        Lrank = self.compute_rank_loss(y_return, y_true_return)

        # Compute movement loss
        Lmove = self.compute_move_loss(y_move, y_true_move)

        # L2 regularization term
        L2_reg = torch.norm(self.e1) ** 2 + torch.norm(self.e2) ** 2

        # Total loss
        total_loss = (1 - delta) * Lrank + delta * Lmove + self.lambda_reg * L2_reg

        return y_return, y_move, total_loss

# Example usage
batch_size = 403
input_dim = 64  # Size of final fused embedding

# Simulated final fused embeddings from the previous steps
tau_F = torch.randn(batch_size, input_dim, device=device)

# Simulated ground truth data for return and movement
y_true_return = torch.randn(batch_size, device=device)  # Ground truth return ratios
y_true_move = torch.randint(0, 2, (batch_size,), dtype=torch.float32, device=device)  # Ground truth movement (binary)

# Initialize the model
model = LossFunction(input_dim=input_dim).to(device)

# # Compute the total loss
# total_loss = model(tau_F, y_true_return, y_true_move)

# print("Total loss:", total_loss.item())

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv

class CompleteStockModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_sectors, t=6, heads=4, dropout=0.05):
        super(CompleteStockModel, self).__init__()

        # Define each block from the given code
        self.hidden_dim = hidden_dim
        self.attentive_gru = AttentiveLSTM(input_dim, hidden_dim)
        self.stock_gat = StockGAT(hidden_dim, hidden_dim, hidden_dim, heads, dropout)
        self.long_term_attention_block = LongTermAttentionBlock(hidden_dim, hidden_dim, t)
        self.sector_embedding_block = SectorEmbeddingBlock(hidden_dim, num_sectors, t)
        self.gat_embedding_block = GATEmbeddingBlock(hidden_dim, num_sectors, heads)
        self.embedding_fusion_layer = EmbeddingFusionLayer(hidden_dim)

    def forward(self, x, batch_stock_names, intra_sector_stock_2_stock, sector_dict):
        # Step 1: Compute weekly embeddings using AttentiveGRU
        weekly_embeddings = self.attentive_gru(x)  # Shape: (batch_size, num_weeks, hidden_dim)
        weekly_embeddings.to(device)
        # Step 2: Process the embeddings through the StockGAT model to get sector-level embeddings
        gi_batch = process_batch(batch_stock_names, weekly_embeddings, self.stock_gat, intra_sector_stock_2_stock)

        # Step 3: Compute long-term trends using LongTermAttentionBlock
        long_term_ai, long_term_gi = self.long_term_attention_block(weekly_embeddings, gi_batch)
        # print("ltai shape", long_term_ai.shape)
        # print("ltgi shape", long_term_gi.shape)

        # Step 4: Calculate sector embeddings using SectorEmbeddingBlock
        sector_embeddings = self.sector_embedding_block(long_term_gi, batch_stock_names, sector_dict)
        # print("sector embeddings shape", sector_embeddings.shape)

        # Step 5: Apply GAT on sector embeddings to learn inter-sector relationships
        learned_sector_embeddings = self.gat_embedding_block(sector_embeddings, self.create_edge_index(sector_dict))
        # print("learned embeddings shape", learned_sector_embeddings.shape)

        # Step 6: Map stocks to their corresponding sector embeddings
        final_stock_embeddings = map_stocks_to_sector_embeddings(batch_stock_names, sector_dict, learned_sector_embeddings, self.hidden_dim, long_term_gi)
        # print("final stock embeddings shape",final_stock_embeddings.shape)

        # Step 7: Fuse the embeddings using the EmbeddingFusionLayer
        final_embeddings = self.embedding_fusion_layer(long_term_ai, long_term_gi, final_stock_embeddings)
        # print("final embeddings shape",final_embeddings.shape)
        weekly_embeddings = weekly_embeddings.to(device)
        gi_batch = gi_batch.to(device)
        long_term_ai = long_term_ai.to(device)
        long_term_gi = long_term_gi.to(device)
        sector_embeddings = sector_embeddings.to(device)
        learned_sector_embeddings = learned_sector_embeddings.to(device)
        final_stock_embeddings = final_stock_embeddings.to(device)
        final_embeddings = final_embeddings.to(device)

        return final_embeddings

    def create_edge_index(self, sector_dict):
        """
        Creates an edge index for the GAT embedding block based on the sectors.
        """
        edge_index_list = []
        sector_names = list(sector_dict.keys())

        for i, sector in enumerate(sector_names):
            for j in range(i + 1, len(sector_names)):  # Connect all sectors to each other
                edge_index_list.append([i, j])
                edge_index_list.append([j, i])

        edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous().to(device)
        return edge_index

In [18]:
import os
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Dictionaries to store stock relationships
intra_sector_stock_2_stock = {}
sector_stocks = {}

file_path = 'Preprocessed_data'
for file in os.listdir(file_path):
    df = pd.read_csv(file_path + "/" + file)
    sector = df['Sector_Encoded'][0]
    if sector in sector_stocks:
        sector_stocks[sector].append(file[:-9])
    else:
        sector_stocks[sector] = [file[:-9]]
sector_stocks = {k: v for k, v in sector_stocks.items() if len(v) > 1}

name_matrix = {}
for same_sector_stocks in sector_stocks.values():
    for stock_key in same_sector_stocks:
        name_matrix.setdefault(stock_key, []).extend(
            [stock_value for stock_value in same_sector_stocks if stock_value != stock_key]
        )

intra_sector_stock_2_stock = name_matrix

inter_sector_matrix = {sector: [other_sector for other_sector in sector_stocks.keys() if sector != other_sector] for sector in sector_stocks.keys()}

# # Create a graph
# G = nx.Graph()

# # Add intra-sector stock relationships
# for stock, related_stocks in intra_sector_stock_2_stock.items():
#     for related_stock in related_stocks:
#         G.add_edge(stock, related_stock, color='blue')

# # Add inter-sector relationships
# for sector, related_sectors in inter_sector_matrix.items():
#     for related_sector in related_sectors:
#         G.add_edge(sector, related_sector, color='red')

# # Draw the graph with better aesthetics
# plt.figure(figsize=(14, 10))
# edges = G.edges()
# colors = [G[u][v]['color'] for u, v in edges]
# pos = nx.spring_layout(G, k=0.3)  # Adjust spacing for clarity
# nx.draw(G, pos, with_labels=False, edge_color=colors, node_size=400, node_color='lightblue', edgecolors='black')
# plt.title("Stock Intra-sector and Inter-sector Relationships", fontsize=14, fontweight='bold')
# plt.show()*/


In [20]:
model = CompleteStockModel(input_dim=15, hidden_dim=64, num_sectors=23).to(device)

data_dir = '/content/Preprocessed_data'  # Directory containing 403 CSV files
window_size = 30  # Number of timesteps per batch for each stock

# Initialize the data loader
data_loader = StockDataLoader(data_dir, window_size)

# Create an iterator
data_iter = iter(data_loader)

x, stock_names, y1, y2 = next(data_iter)

# Move tensors to the device
x = x.to(device)
y1, y2 = y1.to(device), y2.to(device)

# If intra_sector_stock_2_stock and sector_stocks are tensors, move them too
#intra_sector_stock_2_stock = intra_sector_stock_2_stock.to(device)
#sector_stocks = {key: val.to(device) for key, val in sector_stocks.items()}  # If sector_stocks is a dictionary of tensors
#print(sector_stocks)
# Pass everything to the model
output = model(x, stock_names, intra_sector_stock_2_stock, sector_stocks)
print("output shape", output.device)

output shape cuda:0


In [21]:
import torch.optim as optim

loss_fn = LossFunction(64)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [30]:
import torch
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from torch.utils.data import DataLoader, random_split

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Constants and clipping bounds
RETURN_CLIP_MIN = -0.1
RETURN_CLIP_MAX = 0.1
λ = 0.5  # Weight for movement prediction loss

# Hyperparameters
epoch_list = [10, 20, 30]
batch_list = [4, 8, 16]
top_k_values = [5, 10, 20]

# Best tracking
best_predicted = {}
best_actual = {}
max_mrr = 0
best_mse = best_mae = best_movement_accuracy = 0
maxbatch = 0
max_epochs = 0

# Loss functions
loss_fn_return = torch.nn.SmoothL1Loss()
loss_fn_move = torch.nn.BCEWithLogitsLoss()

# Assuming 'dataset' is your complete dataset
total_size = len(x)
train_size = int(0.8 * total_size)
val_size = total_size - train_size
train_dataset, val_dataset = random_split(x, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize model, optimizer, scheduler
model = CompleteStockModel(input_dim=15, hidden_dim=64, num_sectors=23).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)


In [31]:
class RLReranker(torch.nn.Module):
    def __init__(self, input_dim):
        super(RLReranker, self).__init__()
        self.linear = torch.nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.linear(x)

# Initialize RL reranker
rl_reranker = RLReranker(input_dim=1).to(device)
rl_optimizer = torch.optim.Adam(rl_reranker.parameters(), lr=1e-4)


In [40]:
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error

# --- Hyperparameters and constants ---
RETURN_CLIP_MIN = -0.1
RETURN_CLIP_MAX = 0.1
λ = 0.5  # Weight for movement loss

epoch_list = [10, 20, 30]
batch_list = [4, 8, 16]
top_k_values = [5, 10, 20]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss functions and optimizer
loss_fn_return = torch.nn.SmoothL1Loss()
loss_fn_move = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# --- Best tracking ---
max_mrr = 0
best_predicted = {}
best_actual = {}
best_mse = best_mae = best_movement_accuracy = 0
maxbatch = 0
max_epochs = 0

for num_epochs in epoch_list:
    for max_batches in batch_list:
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            batch_count = 0
            stock_return_dict = {}
            stock_true_return_dict = {}

            data_loader = StockDataLoader(data_dir, window_size)
            data_iter = iter(data_loader)

            for batch_idx in range(max_batches):
                try:
                    x, stock_names, y_true_move, y_true_return = next(data_iter)
                except StopIteration:
                    print("End of data reached.")
                    break

                x = x.to(device)
                y_true_move = y_true_move.to(device).view(-1)
                y_true_return = y_true_return.to(device).view(-1)
                y_true_return = torch.clamp(y_true_return, min=RETURN_CLIP_MIN, max=RETURN_CLIP_MAX)

                optimizer.zero_grad()

                # --- Model forward ---
                y_return_pred, y_move_pred = model(x, stock_names, intra_sector_stock_2_stock, sector_stocks)[:2]
                y_return_pred = torch.clamp(y_return_pred.view(-1), min=RETURN_CLIP_MIN, max=RETURN_CLIP_MAX)
                y_move_pred = y_move_pred.view(-1)

                # Ensure sizes match
                min_len = min(y_return_pred.shape[0], y_true_return.shape[0])
                y_return_pred = y_return_pred[:min_len]
                y_true_return = y_true_return[:min_len]

                min_len_move = min(y_move_pred.shape[0], y_true_move.shape[0])
                y_move_pred = y_move_pred[:min_len_move]
                y_true_move = y_true_move[:min_len_move]

                # --- Loss ---
                loss_return = loss_fn_return(y_return_pred, y_true_return)
                loss_move = loss_fn_move(y_move_pred, y_true_move.float())
                loss = loss_return + λ * loss_move

                if torch.isnan(loss):
                    print(f"NaN in loss at Epoch {epoch + 1}, Batch {batch_idx + 1}")
                    break

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()

                running_loss += loss.detach().cpu().item()
                batch_count += 1

                for i, stock in enumerate(stock_names[:min_len]):
                    stock_return_dict[stock] = y_return_pred[i].detach().cpu().item()
                    stock_true_return_dict[stock] = y_true_return[i].detach().cpu().item()

            if batch_count == 0:
                print("No batches processed, exiting training.")
                exit()

            avg_loss = running_loss / batch_count
            scheduler.step()
            print(f"Epoch [{epoch + 1}/{num_epochs}], Avg Loss: {avg_loss:.6f}")

            # --- Evaluation ---
            sorted_predicted = sorted(stock_return_dict.items(), key=lambda item: item[1], reverse=True)
            sorted_actual = sorted(stock_true_return_dict.items(), key=lambda item: item[1], reverse=True)
            predicted_ranks = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_predicted)}
            actual_ranks = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_actual)}

            sum_k = 0
            for k in top_k_values:
                top_k_predicted_stocks = sorted_predicted[:k]
                mrr_top_k = sum(1.0 / actual_ranks[stock] for stock, _ in top_k_predicted_stocks)
                sum_k += mrr_top_k / k

            predicted_returns = list(stock_return_dict.values())
            true_returns = list(stock_true_return_dict.values())
            predicted_moves = [int(p > 0) for p in predicted_returns]
            true_moves = [int(t > 0) for t in true_returns]

            mae = mean_absolute_error(true_returns, predicted_returns)
            mse = mean_squared_error(true_returns, predicted_returns)
            movement_accuracy = sum(p == t for p, t in zip(predicted_moves, true_moves)) / len(true_moves)

            print(f"MAE: {mae:.6f}, MSE: {mse:.6f}, Move Acc: {movement_accuracy:.4f}, MRR sum_k: {sum_k:.4f}")

            if sum_k > max_mrr:
                max_mrr = sum_k
                maxbatch = max_batches
                max_epochs = num_epochs
                best_predicted = sorted_predicted
                best_actual = sorted_actual
                best_mse = mse
                best_mae = mae
                best_movement_accuracy = movement_accuracy

print("\n\U0001F50D Best Results:")
print(f"Best MAE: {best_mae:.6f}")
print(f"Best MSE: {best_mse:.6f}")
print(f"Best Movement Accuracy: {best_movement_accuracy:.4f}")
print(f"Best Epoch Count: {max_epochs}, Best Batch Count: {maxbatch}")


Epoch [1/10], Avg Loss: 0.351203
MAE: 0.041035, MSE: 0.003172, Move Acc: 0.5781, MRR sum_k: 0.4165
Epoch [2/10], Avg Loss: 0.348880
MAE: 0.038623, MSE: 0.003229, Move Acc: 0.5469, MRR sum_k: 0.2491
Epoch [3/10], Avg Loss: 0.351462
MAE: 0.036923, MSE: 0.003196, Move Acc: 0.5000, MRR sum_k: 0.1802
Epoch [4/10], Avg Loss: 0.342745
MAE: 0.032966, MSE: 0.002443, Move Acc: 0.5625, MRR sum_k: 0.2238
Epoch [5/10], Avg Loss: 0.349290
MAE: 0.028578, MSE: 0.002109, Move Acc: 0.5312, MRR sum_k: 0.1426
Epoch [6/10], Avg Loss: 0.348544
MAE: 0.030740, MSE: 0.002106, Move Acc: 0.5625, MRR sum_k: 0.1963
Epoch [7/10], Avg Loss: 0.351724
MAE: 0.032372, MSE: 0.002264, Move Acc: 0.4375, MRR sum_k: 0.1153
Epoch [8/10], Avg Loss: 0.349334
MAE: 0.031834, MSE: 0.002372, Move Acc: 0.4375, MRR sum_k: 0.1305
Epoch [9/10], Avg Loss: 0.345582
MAE: 0.028759, MSE: 0.001819, Move Acc: 0.5469, MRR sum_k: 0.2959
Epoch [10/10], Avg Loss: 0.344691
MAE: 0.033403, MSE: 0.002335, Move Acc: 0.5469, MRR sum_k: 0.1029
Epoch [1/

In [41]:
model_path = "trained_stock_model.pth"
torch.save(model.state_dict(), model_path)
print(f"✅ Model saved to {model_path}")

✅ Model saved to trained_stock_model.pth


In [42]:
from google.colab import files
files.download("trained_stock_model.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [43]:

# Calculate MRR
def calculate_mrr(predicted_ranking, actual_returns,k):
    """
    Calculate Mean Reciprocal Rank (MRR)

    Args:
    predicted_ranking: List of tuples (stock_name, predicted_return) sorted by predicted_return
    actual_returns: List of tuples (stock_name, actual_return)
    """
    # Create a dictionary of actual returns for easy lookup
    actual_dict = {name: ret for name, ret in actual_returns}

    # Sort the actual returns to find the truly best performing stocks
    actual_sorted = sorted(actual_returns, key=lambda x: x[1], reverse=True)
    best_stocks = [name for name, ret in actual_sorted[:k]]  # Top 10 actual performers

    reciprocal_ranks = []

    for best_stock in best_stocks[:k]:
        # Find the rank of this best stock in our predicted ranking
        found = False
        for rank, (stock_name, _) in enumerate(predicted_ranking, 1):  # Start from rank 1
            if stock_name == best_stock:
                reciprocal_ranks.append(1.0 / rank)
                found = True
                break

        if not found:
            reciprocal_ranks.append(0.0)

    mrr = np.mean(reciprocal_ranks) if reciprocal_ranks else 0.0
    return mrr


def calculate_precision_at_k(predicted_ranking, actual_returns, k):
    """
    Calculate Precision@K: Fraction of top-K predicted stocks that are actually among the top-K real performers.
    """
    top_k_predicted = {name for name, _ in predicted_ranking[:k]}
    top_k_actual = {name for name, _ in sorted(actual_returns, key=lambda x: x[1], reverse=True)[:k]}

    correct_predictions = top_k_predicted.intersection(top_k_actual)
    precision = len(correct_predictions) / k if k > 0 else 0.0
    return precision


def calculate_movement_accuracy(y_true, y_pred):
    """
    Calculates the accuracy of predicted movement directions (up/down).
    """
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()

    # Determine direction of movements (1 for up, 0 for down)
    true_direction = (y_true > 0).astype(int)
    pred_direction = (y_pred > 0).astype(int)

    accuracy = np.mean(true_direction == pred_direction)
    return accuracy


def irr_at_k(true_returns_list, predicted_returns_list, k=5):
    """
    Compute IRR@K from two lists of (stock, return) pairs.

    Parameters:
        true_returns_list (list): List of (stock, actual_return) tuples
        predicted_returns_list (list): List of (stock, predicted_return) tuples
        k (int): Top-K value for IRR@K

    Returns:
        irr (float): IRR@K value
    """
    # Sort both lists by return descending
    true_top_k = sorted(true_returns_list, key=lambda x: x[1], reverse=True)[:k]
    pred_top_k = sorted(predicted_returns_list, key=lambda x: x[1], reverse=True)[:k]

    # Create dict for quick lookup
    true_dict = dict(true_returns_list)

    # Compute sums
    sum_true_top_k = sum([ret for _, ret in true_top_k])
    sum_pred_top_k_actual_returns = sum([true_dict.get(stock, 0) for stock, _ in pred_top_k])

    irr = sum_true_top_k - sum_pred_top_k_actual_returns
    return irr


In [44]:
top_k_values = [5, 10, 20]

for k in top_k_values:
    top_k_predicted_stocks = sorted(best_predicted)[:k]
    print(f"\nTop {k} Predicted Stocks:")
    for stock, _ in top_k_predicted_stocks:
        print(f"Stock: {stock}, Predicted Rank: {predicted_ranks[stock]}, Actual Rank: {actual_ranks[stock]}")



Top 5 Predicted Stocks:
Stock: AAVAS, Predicted Rank: 64, Actual Rank: 55
Stock: ABB, Predicted Rank: 57, Actual Rank: 35
Stock: ABSLAMC, Predicted Rank: 7, Actual Rank: 48
Stock: ADANIGREEN, Predicted Rank: 20, Actual Rank: 1
Stock: AIAENG, Predicted Rank: 50, Actual Rank: 37

Top 10 Predicted Stocks:
Stock: AAVAS, Predicted Rank: 64, Actual Rank: 55
Stock: ABB, Predicted Rank: 57, Actual Rank: 35
Stock: ABSLAMC, Predicted Rank: 7, Actual Rank: 48
Stock: ADANIGREEN, Predicted Rank: 20, Actual Rank: 1
Stock: AIAENG, Predicted Rank: 50, Actual Rank: 37
Stock: ASTRAZEN, Predicted Rank: 39, Actual Rank: 22
Stock: AUROPHARMA, Predicted Rank: 53, Actual Rank: 8
Stock: BAJAJHLDNG, Predicted Rank: 18, Actual Rank: 39
Stock: BANDHANBNK, Predicted Rank: 28, Actual Rank: 10
Stock: BANKINDIA, Predicted Rank: 63, Actual Rank: 61

Top 20 Predicted Stocks:
Stock: AAVAS, Predicted Rank: 64, Actual Rank: 55
Stock: ABB, Predicted Rank: 57, Actual Rank: 35
Stock: ABSLAMC, Predicted Rank: 7, Actual Rank

In [45]:
import numpy as np
# Sort predictions by predicted return descending
predicted_ranking = sorted(best_predicted, key=lambda x: x[1], reverse=True)
actual_returns_list = list(best_actual)
# Define K values
K = [5, 10, 20]

# Calculate evaluation metrics
mrr_scores = [calculate_mrr(predicted_ranking, actual_returns_list, k) for k in K]
precision_scores = [calculate_precision_at_k(predicted_ranking, actual_returns_list, k) for k in K]
irr_scores = [irr_at_k(actual_returns_list, predicted_ranking, k) for k in K]

# Display results
print("Evaluation Metrics:")
print(f'MSE ={best_mse} | MAE = {best_mae} | Movement accuracy = {best_movement_accuracy}')
for idx, k in enumerate(K):
    print(f"\nk = {k}:",end = '')
    print(f" MRR = {mrr_scores[idx]:.4f} |  Precision@{k} = {precision_scores[idx]:.4f} | IRR@{k} = {irr_scores[idx]:.4f}")


Evaluation Metrics:
MSE =0.0009090224241570248 | MAE = 0.01943349009843587 | Movement accuracy = 0.515625

k = 5: MRR = 0.0866 |  Precision@5 = 0.2000 | IRR@5 = 0.1536

k = 10: MRR = 0.0671 |  Precision@10 = 0.3000 | IRR@10 = 0.2188

k = 20: MRR = 0.0594 |  Precision@20 = 0.2500 | IRR@20 = 0.3973


In [None]:
# import torch
# import torch.nn as nn
# from torch.utils.data import Dataset, DataLoader
# from torch.optim import AdamW
# from torch.optim.lr_scheduler import OneCycleLR
# from torch.cuda.amp import GradScaler, autocast

# # Assuming model, loss_fn, intra_sector_stock_2_stock, sector_stocks are defined

# # Custom Dataset Class
# class StockDataset(Dataset):
#     def __init__(self, data):
#         self.data = data  # Replace with your actual data loading logic

#     def __len__(self):
#         return len(self.data)  # Implement this based on your data

#     def __getitem__(self, idx):
#         # Return your data sample here
#         # For example:
#         x, stock_names, y_true_move, y_true_return = self.data[idx]
#         return x, stock_names, y_true_move, y_true_return

# # Initialize dataset and data loader
# dataset = StockDataset(your_data_source)  # Replace with actual data source
# data_loader = DataLoader(dataset,
#                         batch_size=32,  # Default batch size
#                         shuffle=True,
#                         num_workers=4)

# # Optimizer and Scheduler
# optimizer = AdamW(model.parameters(), lr=3e-4, weight_decay=1e-5)
# scheduler = OneCycleLR(optimizer,
#                       max_lr=1e-3,
#                       steps_per_epoch=len(data_loader),
#                       epochs=max([10, 20, 30]))

# # Mixed Precision Training
# scaler = GradScaler()

# # Hyperparameters
# maxlist = 0
# epoch_list = [10,20,30]
# batch_list = [4,8,16]
# top_k = 5  # Number of top stocks to retrieve
# top_k_values = [5, 10, 20]

# # Training Loop
# for num_epochs in epoch_list:
#     for max_batches in batch_list:
#         for epoch in range(num_epochs):
#             model.train()
#             running_loss = 0.0
#             batch_count = 0
#             stock_return_dict = {}
#             stock_true_return_dict = {}

#             for batch_idx, batch in enumerate(data_loader):
#                 if batch_idx >= max_batches:
#                     print("Reached max batch limit, stopping loop")
#                     break

#                 batch_count += 1
#                 x, stock_names, y_true_move, y_true_return = batch
#                 x, y_true_move, y_true_return = x.to(device), y_true_move.to(device), y_true_return.to(device)

#                 # Zero gradients
#                 optimizer.zero_grad()

#                 # Forward pass with mixed precision
#                 with autocast():
#                     output = model(x, stock_names, intra_sector_stock_2_stock, sector_stocks)
#                     y_return, y_move, loss = loss_fn(output, y_true_return, y_true_move)

#                 if torch.isnan(loss):
#                     print(f"NaN detected in loss at Epoch {epoch + 1}, Batch {batch_idx + 1}. Stopping training.")
#                     break

#                 # Backward pass
#                 scaler.scale(loss).backward()

#                 # Gradient clipping
#                 scaler.unscale_(optimizer)
#                 torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

#                 # Update model parameters
#                 scaler.step(optimizer)
#                 scaler.update()

#                 # Update running loss
#                 running_loss += loss.detach().cpu().item()

#                 # Store y_return and y_true_return for each stock
#                 for i, stock in enumerate(stock_names):
#                     stock_return_dict[stock] = y_return[i].detach().cpu().numpy()
#                     stock_true_return_dict[stock] = y_true_return[i].detach().cpu().numpy()

#             if batch_count == 0:
#                 print("No batches processed, exiting training.")
#                 exit()

#             avg_loss = running_loss / batch_count

#             print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

#             # Rank stocks based on predicted and true returns
#             sorted_predicted = sorted(stock_return_dict.items(), key=lambda item: item[1], reverse=True)
#             sorted_actual = sorted(stock_true_return_dict.items(), key=lambda item: item[1], reverse=True)

#             predicted_ranks = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_predicted)}
#             actual_ranks = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_actual)}

#             # Calculate MRR for top k stocks
#             sum_k = 0
#             for k in top_k_values:
#                 top_k_predicted_stocks = sorted_predicted[:k]
#                 mrr_top_k = sum(predicted_ranks[stock] / actual_ranks[stock] for stock, _ in top_k_predicted_stocks)
#                 mrr_top_k /= k
#                 sum_k += mrr_top_k

#             if sum_k > maxlist:
#                 maxlist = sum_k
#                 maxbatch = max_batches
#                 max_epochs = num_epochs

#             # Validation phase every 5 epochs
#             if (epoch + 1) % 5 == 0:
#                 model.eval()
#                 with torch.no_grad():
#                     # Calculate validation metrics
#                     validation_loss = 0
#                     validation_mrr = 0
#                     for batch_idx, batch in enumerate(data_loader):
#                         x, stock_names, y_true_move, y_true_return = batch
#                         x, y_true_move, y_true_return = x.to(device), y_true_move.to(device), y_true_return.to(device)
#                         output = model(x, stock_names, intra_sector_stock_2_stock, sector_stocks)
#                         y_return, y_move, loss = loss_fn(output, y_true_return, y_true_move)
#                         validation_loss += loss.detach().cpu().item()

#                         # Store y_return and y_true_return for each stock
#                         stock_return_dict_val = {}
#                         stock_true_return_dict_val = {}
#                         for i, stock in enumerate(stock_names):
#                             stock_return_dict_val[stock] = y_return[i].detach().cpu().numpy()
#                             stock_true_return_dict_val[stock] = y_true_return[i].detach().cpu().numpy()

#                         # Rank stocks based on predicted and true returns
#                         sorted_predicted_val = sorted(stock_return_dict_val.items(), key=lambda item: item[1], reverse=True)
#                         sorted_actual_val = sorted(stock_true_return_dict_val.items(), key=lambda item: item[1], reverse=True)

#                         predicted_ranks_val = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_predicted_val)}
#                         actual_ranks_val = {stock: rank + 1 for rank, (stock, _) in enumerate(sorted_actual_val)}

#                         # Calculate MRR for top k stocks
#                         top_k_predicted_stocks_val = sorted_predicted_val[:top_k]
#                         mrr_top_k_val = sum(predicted_ranks_val[stock] / actual_ranks_val[stock] for stock, _ in top_k_predicted_stocks_val)
#                         mrr_top_k_val /= top_k
#                         validation_mrr += mrr_top_k_val

#                     avg_validation_loss = validation_loss / len(data_loader)
#                     avg_validation_mrr = validation_mrr / len(data_loader)
#                     print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {avg_validation_loss:.4f}, Validation MRR: {avg_validation_mrr:.4f}")

#                 model.train()

#             # Update scheduler
#             scheduler.step()
