In [1]:
import os
import pandas as pd
from darts import TimeSeries
from darts.models import TransformerModel
from sklearn.preprocessing import MinMaxScaler
from pytorch_lightning.callbacks import Callback
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class EpochEndCallback(Callback):
    def __init__(self, log_file='epoch_loss_log_window_transformer_model.txt'):
        self.log_file = log_file
        self.window_number = 0  # Initialize window number
        with open(self.log_file, 'w') as f:
            f.write('Epoch,Train_Loss,Window\n')

    def on_train_epoch_end(self, trainer, pl_module):
        train_loss = trainer.logged_metrics.get("train_loss")
        current_epoch = trainer.current_epoch
        if train_loss is not None:
            with open(self.log_file, 'a') as f:
                f.write(f'{current_epoch},{train_loss.item()},{self.window_number}\n')
            print(f"Epoch {current_epoch} ended with training loss: {train_loss.item()}")

    def set_window_number(self, window_number):
        self.window_number = window_number

# Initialize callbacks without early stopping for testing
epoch_end_callback = EpochEndCallback()

In [None]:
# Load and preprocess the data
csv_file_path = '/home/raj/Rajarshi/Term Project/notebook_files/data/RELIANCE.NS_day.csv'
sbi_data = pd.read_csv(csv_file_path, parse_dates=['Date'])
sbi_data.dropna(inplace=True)
sbi_data.set_index('Date', inplace=True)
sbi_data = sbi_data.asfreq('B', method='pad')

# Create scalers
scaler_close = MinMaxScaler()
scaler_covariates = MinMaxScaler()
sbi_data['Open_Close_Diff'] = sbi_data['Open'] - sbi_data['Close']
sbi_data['Close'] = scaler_close.fit_transform(sbi_data[['Close']])
sbi_data[['RSI', 'Volume', 'Open_Close_Diff']] = scaler_covariates.fit_transform(sbi_data[['RSI', 'Volume', 'Open_Close_Diff']])

# Initialize variables
training_end_date = sbi_data.index.max() - pd.DateOffset(months=2)  # Train using last 2 months of data
final_predictions = []


In [None]:

pl_trainer_kwargs = {"callbacks": [epoch_end_callback], "accelerator": "cpu", "devices": 1}

# Define the window management and model training class
class ModelTrainer:
    def __init__(self, data, scaler_close, epoch_end_callback, pl_trainer_kwargs):
        self.data = data
        self.scaler_close = scaler_close
        self.epoch_end_callback = epoch_end_callback
        self.pl_trainer_kwargs = pl_trainer_kwargs

    def train_model(self, train_data, window_number):
        # Set the window number for the callback
        self.epoch_end_callback.set_window_number(window_number)

        # Prepare the training data
        target_train = TimeSeries.from_series(train_data['Close'])
        past_cov_train = TimeSeries.from_dataframe(train_data[['RSI', 'Volume', 'Open_Close_Diff']])

        # Initialize and train the model
        model = TransformerModel(
            input_chunk_length=150,
            output_chunk_length=15,
            d_model=128,
            nhead=8,
            num_encoder_layers=4,
            num_decoder_layers=4,
            dim_feedforward=1024,
            dropout=0.2,
            n_epochs=50,
            optimizer_cls=torch.optim.Adam,
            optimizer_kwargs={'lr': 0.0005},
            lr_scheduler_cls=torch.optim.lr_scheduler.ReduceLROnPlateau,
            lr_scheduler_kwargs={'patience': 5, 'factor': 0.2, 'verbose': True, 'monitor': 'train_loss'},
            pl_trainer_kwargs=self.pl_trainer_kwargs
        )

        model.fit(target_train, past_covariates=past_cov_train)

        # Predict and inverse transform
        pred = model.predict(n=15, past_covariates=past_cov_train)
        pred_values = self.scaler_close.inverse_transform(pred.values())
        dates = pred.time_index

        return dates, pred_values

# Initialize the trainer
trainer = ModelTrainer(sbi_data, scaler_close, epoch_end_callback, pl_trainer_kwargs)

In [None]:
# Training and prediction loop
window_number = 1
while True:
    train_data = sbi_data.loc[:training_end_date]
    print(f"Training window {window_number}: from {train_data.index.min()} to {train_data.index.max()}")

    # Train the model and get predictions
    dates, pred_values = trainer.train_model(train_data, window_number)

    # Store the predictions
    predictions_df = pd.DataFrame({
        'Date': dates, 
        'Predicted Value': pred_values.flatten(),
        'Window Start': train_data.index.min(),
        'Window End': train_data.index.max()
    })
    final_predictions.append(predictions_df)

    # Update training_end_date for the next window
    training_end_date = dates[-1]

    # Break if we reach the end of the data
    if training_end_date >= sbi_data.index.max():
        break

    window_number += 1


In [None]:
# Combine predictions and save
all_predictions_df = pd.concat(final_predictions, ignore_index=True)
output_csv_file = 'prediction_using_window_method_transformer_model_rel.csv'
all_predictions_df.to_csv(output_csv_file, index=False)

print(f"Predictions saved to {output_csv_file}")