In [None]:
import os
import pandas as pd
from darts import TimeSeries
from darts.models import TransformerModel
from sklearn.preprocessing import MinMaxScaler
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import pytorch_lightning as pl
from pytorch_lightning.callbacks import Callback
import torch

In [None]:
# Disable CUDA by setting the environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Force PyTorch to use CPU only
torch.device('cpu')

In [None]:
class EpochEndCallback(Callback):
    def __init__(self, log_file='epoch_loss_log_100ep_RSI_Vol.txt'):
        self.log_file = log_file
        with open(self.log_file, 'w') as f:
            f.write('Epoch,Loss\n')

    def on_train_epoch_end(self, trainer, pl_module):
        train_loss = trainer.callback_metrics.get("train_loss")
        current_epoch = trainer.current_epoch
        with open(self.log_file, 'a') as f:
            f.write(f'{current_epoch},{train_loss},\n')
        print(f"Epoch {current_epoch} ended with training loss: {train_loss}")

    def on_validation_epoch_end(self, trainer, pl_module):
        val_loss = trainer.callback_metrics.get("val_loss")
        current_epoch = trainer.current_epoch
        with open(self.log_file, 'a') as f:
            f.seek(0, 2)
            f.seek(f.tell() - 2, 0)
            f.write(f'{val_loss}\n')
        print(f"Epoch {current_epoch} ended with validation loss: {val_loss}")

# Create an instance of the callback
epoch_end_callback = EpochEndCallback()

In [None]:

# Define the early stopping callback
early_stopping = EarlyStopping(
    monitor='train_loss',  # Monitor the training loss
    patience=10,           # Number of epochs with no improvement after which training will be stopped
    verbose=True,          # Print messages when early stopping is triggered
    mode='min'             # Minimize the monitored metric
)

# Pass the callback through pl_trainer_kwargs
pl_trainer_kwargs = {
    "callbacks": [epoch_end_callback, early_stopping],  # Include both callbacks
    "accelerator": "cpu",  # Force CPU usage
    "devices": 1  # Use a single CPU
}


In [None]:
# Step 1: Load the data from the CSV file
csv_file_path = '/home/raj/Rajarshi/Term Project/Sir_code_dart/data/SBIN.NS_day_2022.csv'
sbi_data = pd.read_csv(csv_file_path, parse_dates=['Date'])

# Drop rows with any NaN values
sbi_data.dropna(inplace=True)

# Ensure 'Date' is set as the index
sbi_data.set_index('Date', inplace=True)

# Step 2: Detect missing dates and count them
full_range = pd.date_range(start=sbi_data.index.min(), end=sbi_data.index.max(), freq='B')
missing_dates = full_range.difference(sbi_data.index)
print(f"Number of missing dates: {len(missing_dates)}")
if len(missing_dates) > 0:
    print(f"Missing dates filled: {missing_dates}")

# Step 3: Fill missing dates and set frequency
sbi_data = sbi_data.asfreq('B', method='pad')  # Fill missing dates with the previous value

# Separate scalers for the target (Close) and the past covariates (RSI, MACD, Volume, Bollinger Bands)
scaler_close = MinMaxScaler()
scaler_covariates = MinMaxScaler()

# Normalize the target and past covariates separately
sbi_data['Close'] = scaler_close.fit_transform(sbi_data[['Close']])
sbi_data[['RSI', 'MACD', 'Volume', 'Upper_Bollinger_Band', 'Middle_Bollinger_Band', 'Lower_Bollinger_Band']] = scaler_covariates.fit_transform(
    sbi_data[['RSI', 'MACD', 'Volume', 'Upper_Bollinger_Band', 'Middle_Bollinger_Band', 'Lower_Bollinger_Band']]
)


In [None]:
# Create the TimeSeries objects
target = TimeSeries.from_series(sbi_data['Close'])
past_cov = TimeSeries.from_dataframe(sbi_data[['RSI', 'MACD', 'Volume', 'Upper_Bollinger_Band', 'Middle_Bollinger_Band', 'Lower_Bollinger_Band']])

In [None]:

# Configure and train the model
model = TransformerModel(
    input_chunk_length=60,
    output_chunk_length=7,
    d_model=128,  # Increased from 64
    nhead=8,  # Increased from 4
    num_encoder_layers=4,  # Increased from 3
    num_decoder_layers=4,  # Increased from 3
    dim_feedforward=1024,  # Increased from 512
    dropout=0.2,  # Increased regularization
    n_epochs=100,  # More epochs for better convergence
    optimizer_cls=torch.optim.Adam,
    optimizer_kwargs={'lr': 0.0005},
    lr_scheduler_cls=torch.optim.lr_scheduler.ReduceLROnPlateau,
    lr_scheduler_kwargs={
        'patience': 5,
        'factor': 0.2,
        'verbose': True,
        'monitor': 'train_loss'  # Use train_loss if no validation set
    },
    pl_trainer_kwargs=pl_trainer_kwargs,
)

# Fit the model with the target series and past covariates
model.fit(target, past_covariates=past_cov)

# Predict the next 6 days after the last date in the dataset
pred = model.predict(n=10, past_covariates=past_cov)

# Denormalize the predictions using the close scaler
pred_values = scaler_close.inverse_transform(pred.values())

In [None]:
# Get the prediction dates
dates = pred.time_index

# Create a DataFrame to store the predictions along with the dates
predictions_df = pd.DataFrame({
    'Date': dates,
    'Predicted Value': pred_values.flatten()
})

# Save the predictions to a CSV file
output_csv_file = 'predicted_values_1_100ep_RSI_MACD_Vol_Bollinger.csv'
predictions_df.to_csv(output_csv_file, index=False)

# Print confirmation
print(f"Predicted values saved to {output_csv_file}")