In [13]:
import math
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)

import yfinance as yf

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler


In [14]:
from tqdm import tqdm

In [15]:
from transformer_model import TransformerModel
from dataset import Dataset

In [16]:
def calculate_bollinger_bands(data, window=10, num_of_std=2):
    """Calculate Bollinger Bands"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    upper_band = rolling_mean + (rolling_std * num_of_std)
    lower_band = rolling_mean - (rolling_std * num_of_std)
    return upper_band, lower_band

def calculate_rsi(data, window=10):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_roc(data, periods=10):
    """Calculate Rate of Change."""
    roc = ((data - data.shift(periods)) / data.shift(periods)) * 100
    return roc

In [17]:
tickers = ['TCS', 'WIPRO', 'HCLTECH', 'INFY', 'LTIM', 'TECHM']

In [18]:
ticker_data_frames = []
stats = {}
for ticker in tickers:
    ticker = ticker + '.NS'
    # Download historical data for the ticker
    data = yf.download(ticker, period="5y", interval="1d")

    # Calculate the daily percentage change
    close = data['Close']
    upper, lower = calculate_bollinger_bands(close, window=14, num_of_std=2)
    width = upper - lower
    rsi = calculate_rsi(close, window=14)
    roc = calculate_roc(close, periods=14)
    volume = data['Volume']
    diff = data['Close'].diff(1)
    percent_change_close = data['Close'].pct_change() * 100

    # Create a DataFrame for the current ticker and append it to the list
    ticker_df = pd.DataFrame({
        ticker+'_close': close,
        ticker+'_width': width,
        ticker+'_rsi': rsi,
        ticker+'_roc': roc,
        ticker+'_volume': volume,
        ticker+'_diff': diff,
        ticker+'_percent_change_close': percent_change_close,
    })
    
    MEAN = ticker_df.mean()
    STD = ticker_df.std()

    # Keep track of mean and std
    for column in MEAN.index:
      stats[f"{column}_mean"] = MEAN[column]
      stats[f"{column}_std"] = STD[column]
    
    # # Normalize the training features
    # ticker_df = (ticker_df - MEAN) / STD

    ticker_data_frames.append(ticker_df)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [19]:
# Convert the dictionary containing feature statistics to a DataFrame for easier access
stats = pd.DataFrame([stats], index=[0])

# Display the DataFrame to verify its structure
stats.head()

Unnamed: 0,TCS.NS_close_mean,TCS.NS_close_std,TCS.NS_width_mean,TCS.NS_width_std,TCS.NS_rsi_mean,TCS.NS_rsi_std,TCS.NS_roc_mean,TCS.NS_roc_std,TCS.NS_volume_mean,TCS.NS_volume_std,...,TECHM.NS_rsi_mean,TECHM.NS_rsi_std,TECHM.NS_roc_mean,TECHM.NS_roc_std,TECHM.NS_volume_mean,TECHM.NS_volume_std,TECHM.NS_diff_mean,TECHM.NS_diff_std,TECHM.NS_percent_change_close_mean,TECHM.NS_percent_change_close_std
0,3103.656071,635.026142,246.54054,116.087634,53.164785,16.328442,0.86858,5.265469,2775387.0,1723884.0,...,53.443288,16.745527,1.206234,7.096912,3233784.0,2216003.0,0.661548,20.249652,0.083426,1.95063


In [20]:
df = pd.concat(ticker_data_frames, axis=1)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
df.head(2)

Unnamed: 0_level_0,TCS.NS_close,TCS.NS_width,TCS.NS_rsi,TCS.NS_roc,TCS.NS_volume,TCS.NS_diff,TCS.NS_percent_change_close,WIPRO.NS_close,WIPRO.NS_width,WIPRO.NS_rsi,...,LTIM.NS_volume,LTIM.NS_diff,LTIM.NS_percent_change_close,TECHM.NS_close,TECHM.NS_width,TECHM.NS_rsi,TECHM.NS_roc,TECHM.NS_volume,TECHM.NS_diff,TECHM.NS_percent_change_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-08-08,2258.100098,221.628052,74.364528,8.721931,2073298,44.650146,2.01722,265.75,10.331389,51.653524,...,49320,22.950073,1.434066,680.200012,69.044901,52.350538,0.76291,2708834,5.75,0.852547
2019-08-09,2246.25,223.433874,69.442483,6.462397,1744550,-11.850098,-0.524782,263.5,10.263133,48.082615,...,156454,12.25,0.754636,663.349976,66.887992,46.512556,-1.279863,3743407,-16.850037,-2.477218


* add date end
* add time to sequence

In [21]:
SEQUENCE_LEN = 15  # 15 days of data
BATCH_SIZE = 32
dataset = Dataset(df,SEQUENCE_LEN)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dataloader = torch.utils.data.DataLoader(
            train_set,
            batch_size=BATCH_SIZE,
            shuffle=True,num_workers=4,drop_last=True
)
val_dataloader = torch.utils.data.DataLoader(
            val_set,
            batch_size=BATCH_SIZE,
            shuffle=True,num_workers=4,drop_last=True
)

In [22]:
def dir_acc(seq,y_true, y_pred):
    y_true_prev = seq[:,-1,0]
    # print(y_true.shape,y_true_prev.shape,y_pred.shape)
    true_change = y_true - y_true_prev  # Calculate true change
    pred_change = y_pred.squeeze(1) - y_true_prev  # Calculate predicted change
    # print(torch.sign(true_change), torch.sign(pred_change))
    correct_direction = torch.eq(torch.sign(true_change), torch.sign(pred_change))  # Check if the signs match
    # print(correct_direction)
    return torch.mean(torch.tensor(correct_direction).float())  # Return the mean of correct directions

In [23]:
# Hyperparameters
sequence_length = 12
input_size = 42
output_size = 1
num_layers = 2
d_model = 64
nhead = 4
num_epochs = 100
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the model, loss function and optimizer
model = TransformerModel(input_size, output_size, d_model, nhead, num_layers)
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [24]:
train_loss_avg = 100
loss_test = 100
avg_dir_accuracy = 0 
# Training loop
for epoch in range(num_epochs):
    
    model.train()
    train_loss_avg = 0
    for seq,target,mean,std in tqdm(train_dataloader):
        optimizer.zero_grad()
        seq = seq.cuda()
        predictions = model(seq)
        loss = criterion(predictions, target.cuda())
        loss.backward()
        optimizer.step()
        train_loss_avg += loss / len(train_dataloader)

    if (epoch + 1) % 1 == 0:
        with torch.no_grad():
            loss_test = 0
            avg_dir_accuracy = 0
            for data in val_dataloader:
                seq,target,mean,std = data
                predictions = model(seq.cuda())
                # validation loss
                batch_loss = criterion(predictions, target.cuda())
                loss_test += batch_loss
                tst = dir_acc(seq.cuda(),target.cuda(),predictions)
                # print('tst',tst)
                avg_dir_accuracy += dir_acc(seq.cuda(),target.cuda(),predictions)
            loss_test /= len(val_dataloader)
            avg_dir_accuracy /= len(val_dataloader)
    print ('-'*15 + f'Epoch:{epoch} Train_loss:{train_loss_avg} Val_loss:{loss_test} Dir Accuracy:f{avg_dir_accuracy}')

  0%|          | 0/60 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 60/60 [00:05<00:00, 10.29it/s]
  return torch.mean(torch.tensor(correct_direction).float())  # Return the mean of correct directions


---------------Epoch:0 Train_loss:3.4817123413085938 Val_loss:2.5427749156951904 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.74it/s]


---------------Epoch:1 Train_loss:3.365734338760376 Val_loss:2.5194661617279053 Dir Accuracy:f0.5083333849906921


100%|██████████| 60/60 [00:06<00:00,  9.80it/s]


---------------Epoch:2 Train_loss:3.3323707580566406 Val_loss:2.6366095542907715 Dir Accuracy:f0.5208333730697632


100%|██████████| 60/60 [00:06<00:00,  9.82it/s]


---------------Epoch:3 Train_loss:3.3230044841766357 Val_loss:2.5268192291259766 Dir Accuracy:f0.5


100%|██████████| 60/60 [00:05<00:00, 10.12it/s]


---------------Epoch:4 Train_loss:3.321408748626709 Val_loss:2.491849422454834 Dir Accuracy:f0.5208333730697632


100%|██████████| 60/60 [00:06<00:00,  9.95it/s]


---------------Epoch:5 Train_loss:3.3293285369873047 Val_loss:2.7257843017578125 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.89it/s]


---------------Epoch:6 Train_loss:3.3395516872406006 Val_loss:2.6564674377441406 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:05<00:00, 10.09it/s]


---------------Epoch:7 Train_loss:3.319493532180786 Val_loss:2.5761024951934814 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:06<00:00,  9.60it/s]


---------------Epoch:8 Train_loss:3.3181588649749756 Val_loss:2.4835238456726074 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.44it/s]


---------------Epoch:9 Train_loss:3.3212730884552 Val_loss:2.5131781101226807 Dir Accuracy:f0.5041667222976685


100%|██████████| 60/60 [00:06<00:00,  9.93it/s]


---------------Epoch:10 Train_loss:3.3368849754333496 Val_loss:2.5333151817321777 Dir Accuracy:f0.5


100%|██████████| 60/60 [00:06<00:00,  9.90it/s]


---------------Epoch:11 Train_loss:3.313323736190796 Val_loss:2.5141797065734863 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:05<00:00, 10.00it/s]


---------------Epoch:12 Train_loss:3.3319783210754395 Val_loss:2.546980619430542 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:06<00:00,  9.84it/s]


---------------Epoch:13 Train_loss:3.2987492084503174 Val_loss:2.509134292602539 Dir Accuracy:f0.5


100%|██████████| 60/60 [00:05<00:00, 10.33it/s]


---------------Epoch:14 Train_loss:3.3148839473724365 Val_loss:2.561096668243408 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.98it/s]


---------------Epoch:15 Train_loss:3.31581974029541 Val_loss:2.4979751110076904 Dir Accuracy:f0.5291666984558105


100%|██████████| 60/60 [00:05<00:00, 10.20it/s]


---------------Epoch:16 Train_loss:3.31976580619812 Val_loss:2.522981882095337 Dir Accuracy:f0.5


100%|██████████| 60/60 [00:06<00:00,  9.53it/s]


---------------Epoch:17 Train_loss:3.3082096576690674 Val_loss:2.5016725063323975 Dir Accuracy:f0.5208333730697632


100%|██████████| 60/60 [00:05<00:00, 10.02it/s]


---------------Epoch:18 Train_loss:3.3003571033477783 Val_loss:2.554375648498535 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:06<00:00,  9.38it/s]


---------------Epoch:19 Train_loss:3.2878901958465576 Val_loss:2.589150905609131 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:05<00:00, 10.16it/s]


---------------Epoch:20 Train_loss:3.3260421752929688 Val_loss:2.5617897510528564 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:06<00:00,  9.70it/s]


---------------Epoch:21 Train_loss:3.30718994140625 Val_loss:2.541612148284912 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.71it/s]


---------------Epoch:22 Train_loss:3.296342134475708 Val_loss:2.4876723289489746 Dir Accuracy:f0.5166667103767395


100%|██████████| 60/60 [00:06<00:00,  9.96it/s]


---------------Epoch:23 Train_loss:3.2688722610473633 Val_loss:2.565500259399414 Dir Accuracy:f0.5125000476837158


100%|██████████| 60/60 [00:06<00:00,  9.90it/s]


---------------Epoch:24 Train_loss:3.3226990699768066 Val_loss:2.5256152153015137 Dir Accuracy:f0.5041667222976685


100%|██████████| 60/60 [00:06<00:00,  9.90it/s]


---------------Epoch:25 Train_loss:3.3107776641845703 Val_loss:2.5187902450561523 Dir Accuracy:f0.5


100%|██████████| 60/60 [00:06<00:00,  9.78it/s]


---------------Epoch:26 Train_loss:3.317934036254883 Val_loss:2.5302281379699707 Dir Accuracy:f0.5041667222976685


 73%|███████▎  | 44/60 [00:04<00:01,  9.66it/s]


KeyboardInterrupt: 