In [1]:
import math
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)

import yfinance as yf

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torchsummary import summary



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from tqdm import tqdm

In [3]:
def calculate_bollinger_bands(data, window=10, num_of_std=2):
    """Calculate Bollinger Bands"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    upper_band = rolling_mean + (rolling_std * num_of_std)
    lower_band = rolling_mean - (rolling_std * num_of_std)
    return upper_band, lower_band

def calculate_rsi(data, window=10):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_roc(data, periods=10):
    """Calculate Rate of Change."""
    roc = ((data - data.shift(periods)) / data.shift(periods)) * 100
    return roc

In [4]:
tickers = ['TCS.NS', 'WIPRO.NS', 'HCLTECH.NS', 'INFY.NS', 'LTIM.NS', 'TECHM.NS', '^CNXIT', '^NSEI']

In [5]:
ticker_data_frames = []
stats = {}
for ticker in tickers:
    
    # Download historical data for the ticker
    data = yf.download(ticker, period="10y", interval="1d")
    # Calculate the daily percentage change
    close = data['Close']
    high = data['High']
    low = data['Low']
    upper, lower = calculate_bollinger_bands(close, window=14, num_of_std=2)
    width = upper - lower
    rsi = calculate_rsi(close, window=14)
    roc = calculate_roc(close, periods=14)
    volume = data['Volume']
    diff = data['Close'].diff(1)
    percent_change_close = data['Close'].pct_change() * 100

    # Create a DataFrame for the current ticker and append it to the list
    ticker_df = pd.DataFrame({
        ticker+'_close': close,
        ticker+'_high': high,
        ticker+'_low': low,
        ticker+'_width': width,
        ticker+'_rsi': rsi,
        ticker+'_roc': roc,
        ticker+'_volume': volume,
        ticker+'_diff': diff,
        ticker+'_percent_change_close': percent_change_close,
    })
    
    MEAN = ticker_df.mean()
    STD = ticker_df.std()

    # Keep track of mean and std
    for column in MEAN.index:
      stats[f"{column}_mean"] = MEAN[column]
      stats[f"{column}_std"] = STD[column]
    
    # # Normalize the training features
    ticker_df = (ticker_df - MEAN) / STD

    ticker_data_frames.append(ticker_df)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
# Convert the dictionary containing feature statistics to a DataFrame for easier access
stats = pd.DataFrame([stats], index=[0])

# Display the DataFrame to verify its structure
stats.head()

Unnamed: 0,TCS.NS_close_mean,TCS.NS_close_std,TCS.NS_high_mean,TCS.NS_high_std,TCS.NS_low_mean,TCS.NS_low_std,TCS.NS_width_mean,TCS.NS_width_std,TCS.NS_rsi_mean,TCS.NS_rsi_std,...,^NSEI_rsi_mean,^NSEI_rsi_std,^NSEI_roc_mean,^NSEI_roc_std,^NSEI_volume_mean,^NSEI_volume_std,^NSEI_diff_mean,^NSEI_diff_std,^NSEI_percent_change_close_mean,^NSEI_percent_change_close_std
0,2288.242073,979.319322,2311.089348,987.195645,2265.710511,971.326023,184.863584,116.623494,52.598581,16.551548,...,55.990412,17.351379,0.746679,3.93005,316473.023635,197530.962957,6.94417,130.189138,0.053165,1.046681


In [7]:
df = pd.concat(ticker_data_frames, axis=1)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
df.head(2)

Unnamed: 0_level_0,TCS.NS_close,TCS.NS_high,TCS.NS_low,TCS.NS_width,TCS.NS_rsi,TCS.NS_roc,TCS.NS_volume,TCS.NS_diff,TCS.NS_percent_change_close,WIPRO.NS_close,...,^CNXIT_percent_change_close,^NSEI_close,^NSEI_high,^NSEI_low,^NSEI_width,^NSEI_rsi,^NSEI_roc,^NSEI_volume,^NSEI_diff,^NSEI_percent_change_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-12,-1.209148,-1.217884,-1.219169,-0.758105,0.398318,0.489847,-0.565092,0.147699,0.35381,-1.078486,...,46.044367,-1.035195,-1.033337,-1.027708,-0.81007,0.23033,0.345788,-1.054888,-0.75194,-1.102555
2016-12-13,-1.213181,-1.217681,-1.208874,-0.811018,0.188074,0.225403,-0.21611,-0.145681,-0.279877,-1.050415,...,0.035475,-1.023867,-1.033736,-1.027407,-0.838771,0.283735,0.40707,-0.670644,0.338399,0.545542


* add date end
* add time to sequence

In [8]:
df.shape

(1874, 72)

In [9]:
df.head()

Unnamed: 0_level_0,TCS.NS_close,TCS.NS_high,TCS.NS_low,TCS.NS_width,TCS.NS_rsi,TCS.NS_roc,TCS.NS_volume,TCS.NS_diff,TCS.NS_percent_change_close,WIPRO.NS_close,...,^CNXIT_percent_change_close,^NSEI_close,^NSEI_high,^NSEI_low,^NSEI_width,^NSEI_rsi,^NSEI_roc,^NSEI_volume,^NSEI_diff,^NSEI_percent_change_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-12,-1.209148,-1.217884,-1.219169,-0.758105,0.398318,0.489847,-0.565092,0.147699,0.35381,-1.078486,...,46.044367,-1.035195,-1.033337,-1.027708,-0.81007,0.23033,0.345788,-1.054888,-0.75194,-1.102555
2016-12-13,-1.213181,-1.217681,-1.208874,-0.811018,0.188074,0.225403,-0.21611,-0.145681,-0.279877,-1.050415,...,0.035475,-1.023867,-1.033736,-1.027407,-0.838771,0.283735,0.40707,-0.670644,0.338399,0.545542
2016-12-14,-1.209454,-1.214288,-1.205785,-0.825229,0.003718,0.004189,-0.331843,0.068199,0.181201,-1.043398,...,0.015493,-1.032607,-1.033614,-1.025332,-1.028423,0.403327,0.503033,-0.870107,-0.355589,-0.50805
2016-12-15,-1.183059,-1.19028,-1.199093,-0.898984,-0.5707,-0.501639,0.680746,0.692954,1.525346,-1.043948,...,0.013265,-1.039015,-1.034389,-1.034962,-1.045431,-0.186896,-0.066754,-0.681782,-0.274941,-0.387654
2016-12-16,-1.171137,-1.181214,-1.173355,-0.898883,-0.156507,-0.161825,-0.283046,0.294037,0.650433,-1.051103,...,-0.008936,-1.042158,-1.044836,-1.033734,-1.051414,-0.294784,-0.150698,-0.574457,-0.162026,-0.216596


In [10]:
df.max().max()

46.044367317233245

In [11]:
from dataset import Dataset

In [12]:
SEQUENCE_LEN = 15  # 15 days of data
BATCH_SIZE = 24
dataset = Dataset(df,SEQUENCE_LEN)


In [13]:

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dataloader = torch.utils.data.DataLoader(
            train_set,
            batch_size=BATCH_SIZE,
            shuffle=True,num_workers=4,drop_last=True
)
val_dataloader = torch.utils.data.DataLoader(
            val_set,
            batch_size=BATCH_SIZE,
            shuffle=True,num_workers=4,drop_last=True
)

In [14]:
len(dataset)

1858

In [15]:
def dir_acc(seq,y_true, y_pred):
    y_true_prev = seq[:,-1,0]
    y_true = y_true[:,0]
    y_pred = y_pred[:,0]
    # print(y_true.shape,y_true_prev.shape,y_pred.shape)
    true_change = y_true - y_true_prev  # Calculate true change
    pred_change = y_pred - y_true_prev  # Calculate predicted change
    # print(torch.sign(true_change), torch.sign(pred_change))
    correct_direction = torch.eq(torch.sign(true_change), torch.sign(pred_change))  # Check if the signs match
    # print(correct_direction)
    return torch.mean(torch.tensor(correct_direction).float())  # Return the mean of correct directionsb

In [16]:
def directn_acc(y_true,y_pred):
    # print(y_true,y_pred)
    return (y_true==(y_pred>0.5)).sum()/len(y_true)

In [17]:
from transformer_model import TransformerModel as CustomModel

In [21]:
# Hyperparameters
input_size = df.shape[1]
output_size = 1 #len(tickers)
num_layers = 4
d_model = 16
nhead = 4
num_epochs = 50
learning_rate = 0.001
device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")

# Instantiate the model, loss function and optimizer
model = CustomModel(input_size, output_size, d_model, nhead, num_layers)
model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# print(summary(model, (torch.zeros((input_size,15)))))

In [22]:
train_loss_avg = 100
loss_test = 100
avg_dir_accuracy = 0 
# Training loop
for epoch in range(num_epochs):
    
    model.train()
    train_loss_avg = 0
    for seq,target,mean,std in tqdm(train_dataloader):
        optimizer.zero_grad()
        seq = seq.to(device)
        predictions = model(seq)
        # print(predictions.squeeze(-1).shape)
        # print(target.shape)
        loss = criterion(predictions.squeeze(-1), target.to(device))
        loss.backward()
        optimizer.step()
        train_loss_avg += loss / len(train_dataloader)

    if (epoch + 1) % 3 == 0:
        with torch.no_grad():
            loss_test = 0
            avg_dir_accuracy = 0
            for data in val_dataloader:
                seq,target,mean,std = data
                predictions = model(seq.to(device))
                # validation loss
                batch_loss = criterion(predictions.squeeze(-1), target.to(device))
                loss_test += batch_loss
                # tst = directn_acc(seq.to(device),target.to(device),predictions)
                # print('tst',tst)
                # print(avg_dir_accuracy)
                avg_dir_accuracy += directn_acc(target.to(device),predictions.squeeze(-1))
            loss_test /= len(val_dataloader)
            avg_dir_accuracy /= len(val_dataloader)
        print ('-'*15 + f'Epoch:{epoch} Train_loss:{train_loss_avg} Val_loss:{loss_test} Dir Accuracy:f{avg_dir_accuracy}')

100%|██████████| 61/61 [00:02<00:00, 24.28it/s]
100%|██████████| 61/61 [00:03<00:00, 17.23it/s]
100%|██████████| 61/61 [00:05<00:00, 10.87it/s]


---------------Epoch:2 Train_loss:0.6271580457687378 Val_loss:0.6378880143165588 Dir Accuracy:f0.6500000357627869


100%|██████████| 61/61 [00:04<00:00, 12.51it/s]
100%|██████████| 61/61 [00:04<00:00, 12.38it/s]
100%|██████████| 61/61 [00:04<00:00, 13.60it/s]


---------------Epoch:5 Train_loss:0.5654814839363098 Val_loss:0.5848717093467712 Dir Accuracy:f0.7055556178092957


100%|██████████| 61/61 [00:01<00:00, 34.19it/s]
100%|██████████| 61/61 [00:02<00:00, 21.41it/s]
100%|██████████| 61/61 [00:03<00:00, 18.23it/s]


---------------Epoch:8 Train_loss:0.4628218710422516 Val_loss:0.5508499145507812 Dir Accuracy:f0.7166666388511658


100%|██████████| 61/61 [00:01<00:00, 36.27it/s]
100%|██████████| 61/61 [00:01<00:00, 38.23it/s]
100%|██████████| 61/61 [00:01<00:00, 37.14it/s]


---------------Epoch:11 Train_loss:0.37878602743148804 Val_loss:0.4851488471031189 Dir Accuracy:f0.8083333969116211


100%|██████████| 61/61 [00:03<00:00, 15.46it/s]
100%|██████████| 61/61 [00:04<00:00, 15.23it/s]
100%|██████████| 61/61 [00:03<00:00, 18.66it/s]


---------------Epoch:14 Train_loss:0.32010188698768616 Val_loss:0.4866156280040741 Dir Accuracy:f0.7972222566604614


100%|██████████| 61/61 [00:01<00:00, 33.14it/s]
100%|██████████| 61/61 [00:03<00:00, 15.37it/s]
100%|██████████| 61/61 [00:04<00:00, 13.30it/s]


---------------Epoch:17 Train_loss:0.25996676087379456 Val_loss:0.508361279964447 Dir Accuracy:f0.7944445013999939


100%|██████████| 61/61 [00:02<00:00, 26.53it/s]
100%|██████████| 61/61 [00:04<00:00, 14.34it/s]
100%|██████████| 61/61 [00:04<00:00, 14.86it/s]


---------------Epoch:20 Train_loss:0.2699526250362396 Val_loss:0.4765937626361847 Dir Accuracy:f0.7916666865348816


100%|██████████| 61/61 [00:02<00:00, 22.05it/s]
100%|██████████| 61/61 [00:04<00:00, 14.94it/s]
100%|██████████| 61/61 [00:01<00:00, 44.13it/s]


---------------Epoch:23 Train_loss:0.2268901914358139 Val_loss:0.5425188541412354 Dir Accuracy:f0.7805556654930115


100%|██████████| 61/61 [00:02<00:00, 20.47it/s]
100%|██████████| 61/61 [00:04<00:00, 14.90it/s]
100%|██████████| 61/61 [00:03<00:00, 15.51it/s]


---------------Epoch:26 Train_loss:0.21879814565181732 Val_loss:0.4533858299255371 Dir Accuracy:f0.8166667222976685


100%|██████████| 61/61 [00:01<00:00, 45.82it/s]
100%|██████████| 61/61 [00:04<00:00, 15.19it/s]
100%|██████████| 61/61 [00:01<00:00, 42.55it/s]


---------------Epoch:29 Train_loss:0.18511590361595154 Val_loss:0.5061006546020508 Dir Accuracy:f0.8083333373069763


100%|██████████| 61/61 [00:01<00:00, 38.15it/s]
100%|██████████| 61/61 [00:01<00:00, 45.56it/s]
100%|██████████| 61/61 [00:01<00:00, 42.00it/s]


---------------Epoch:32 Train_loss:0.1679581254720688 Val_loss:0.519120454788208 Dir Accuracy:f0.8222222924232483


100%|██████████| 61/61 [00:01<00:00, 40.05it/s]
100%|██████████| 61/61 [00:01<00:00, 31.18it/s]
100%|██████████| 61/61 [00:01<00:00, 39.39it/s]


---------------Epoch:35 Train_loss:0.1524675488471985 Val_loss:0.5023630857467651 Dir Accuracy:f0.8250000476837158


100%|██████████| 61/61 [00:04<00:00, 14.61it/s]
100%|██████████| 61/61 [00:01<00:00, 33.12it/s]
100%|██████████| 61/61 [00:03<00:00, 17.51it/s]


---------------Epoch:38 Train_loss:0.1443261057138443 Val_loss:0.49065452814102173 Dir Accuracy:f0.8388890027999878


100%|██████████| 61/61 [00:04<00:00, 15.21it/s]
100%|██████████| 61/61 [00:04<00:00, 14.56it/s]
 10%|▉         | 6/61 [00:00<00:03, 14.45it/s]