In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, random_split, DataLoader
import torch.nn as nn
import os
import zipfile
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,f1_score, accuracy_score
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv("/content/drive/MyDrive/RELIANCE.csv")
df = df.dropna()
numeric_columns = df.select_dtypes(include=[float, int]).columns
max_vwap = df['VWAP'].max()

# Apply max normalization to numeric columns only
normalized_df = df.copy()
normalized_df[numeric_columns] = df[numeric_columns].apply(lambda x: x / x.max())

print(df)
normalized_df

            Date    Symbol Series  Prev Close     Open     High      Low  \
2850  2011-06-01  RELIANCE     EQ      951.85   952.00   958.65   943.65   
2851  2011-06-02  RELIANCE     EQ      946.80   936.55   954.70   936.55   
2852  2011-06-03  RELIANCE     EQ      951.05   960.50   967.00   931.50   
2853  2011-06-06  RELIANCE     EQ      934.60   934.65   940.80   928.15   
2854  2011-06-07  RELIANCE     EQ      937.75   933.55   960.00   933.55   
...          ...       ...    ...         ...      ...      ...      ...   
5301  2021-04-26  RELIANCE     EQ     1904.35  1920.00  1962.00  1911.50   
5302  2021-04-27  RELIANCE     EQ     1937.85  1940.00  1997.20  1938.25   
5303  2021-04-28  RELIANCE     EQ     1988.65  1997.85  2008.00  1980.15   
5304  2021-04-29  RELIANCE     EQ     1997.30  2022.90  2044.50  2007.30   
5305  2021-04-30  RELIANCE     EQ     2024.05  2008.50  2036.00  1987.55   

         Last    Close     VWAP   Volume      Turnover    Trades  \
2850   947.50   946

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
2850,2011-06-01,RELIANCE,EQ,0.409477,0.409462,0.404605,0.408409,0.407544,0.407305,0.405149,0.028184,0.011826,0.041043,0.046961,0.588807
2851,2011-06-02,RELIANCE,EQ,0.407305,0.402817,0.402938,0.405336,0.409695,0.409133,0.404833,0.033005,0.013839,0.044145,0.055575,0.595052
2852,2011-06-03,RELIANCE,EQ,0.409133,0.413118,0.408129,0.403151,0.402598,0.402056,0.406799,0.066966,0.028215,0.090154,0.053961,0.284736
2853,2011-06-06,RELIANCE,EQ,0.402056,0.402000,0.397071,0.401701,0.403716,0.403411,0.399789,0.021550,0.008923,0.030371,0.024831,0.407229
2854,2011-06-07,RELIANCE,EQ,0.403411,0.401527,0.405174,0.404038,0.412749,0.412230,0.406312,0.061718,0.025972,0.062096,0.126332,0.723310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5301,2021-04-26,RELIANCE,EQ,0.819234,0.825806,0.828075,0.827292,0.833584,0.833645,0.829815,0.147488,0.126758,0.181406,0.222802,0.533806
5302,2021-04-27,RELIANCE,EQ,0.833645,0.834409,0.842932,0.838870,0.855951,0.855499,0.845768,0.141444,0.123901,0.203850,0.196516,0.490933
5303,2021-04-28,RELIANCE,EQ,0.855499,0.859290,0.847490,0.857004,0.857306,0.859220,0.853872,0.121139,0.107131,0.173142,0.204301,0.596013
5304,2021-04-29,RELIANCE,EQ,0.859220,0.870065,0.862895,0.868754,0.868855,0.870728,0.865247,0.123192,0.110398,0.149216,0.147648,0.423562


In [None]:
class ECG_Data(Dataset):

    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        data = []

        if index > (len(self.df) - 21):
          for i in range(index - 20, index):
            slice1 = self.df.iloc[i][4 : 11]
            app = slice1.values.astype(float)
            data.append(app)
          label = self.df.iloc[index][9]
          # if label[0]
          data = torch.tensor(data)

        else :
          for i in range(index, index + 20):
            slice1 = self.df.iloc[i][4 : 11]
            app = slice1.values.astype(float)
            data.append(app)
          label = self.df.iloc[index + 20][9]
          # label = torch.tensor(label.values.astype(float))
          data = torch.tensor(data)

        return data, label

In [None]:
batch_size = 12
ds = ECG_Data(normalized_df)

print(len(ds))
train_size = int(0.9 * len(ds))
test_size = len(ds) - train_size
train_dataset, test_dataset = random_split(ds, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

2456


In [None]:
for data, label in train_loader:
  print(data.size())
  print(label)
  print(label.size())
  break

torch.Size([12, 20, 7])
tensor([0.3654, 0.3747, 0.5943, 0.5477, 0.4637, 0.5904, 0.3797, 0.5762, 0.3495,
        0.3569, 0.3676, 0.3945], dtype=torch.float64)
torch.Size([12])


  data = torch.tensor(data)


In [None]:
class Model1(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, device):
        super(Model1,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        #self.self_attention = nn.MultiheadAttention(input_size, num_heads = <place the number of heads here>)
        self.fc = nn.Sequential(nn.Linear(hidden_size, 1))

        self.fc2 = nn.Sequential(nn.Linear(140, 1))
        self.flat = nn.Flatten()

        self.sigmoid = nn.Sigmoid()

    def positional_encoding(self, var, size):
        inv_freq = 1.0 / (
            10000
            ** (torch.arange(0, size, 2, device=self.device).float() / size)
        )
        pos_enc_a = torch.sin(var.repeat(1, size // 2) * inv_freq)
        pos_enc_b = torch.cos(var.repeat(1, size // 2) * inv_freq)
        pos_enc = torch.cat([pos_enc_a, pos_enc_b], dim=-1)
        return pos_enc


    def forward(self, lead_data):
        batch_size = lead_data.size(0)
        seq_length = lead_data.size(1)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        h0 = h0.double()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        c0 = c0.double()
        lead_data = lead_data.double()
        lstm_out, _ = self.lstm(lead_data, (h0, c0))

        # data = self.flat(lead_data)

        #lead_data = lead_data.permute(1,0,2)
        #attention_out,_ = self.self_attention(lead_data,lead_data,lead_data)
        #attention_out = attention_out.permute(1,0,2)
        #last_output = attention_out[:,-1,:]

        last_output = lstm_out[:, -1, :]

        output = self.fc(last_output)
        # output = self.fc2(data)

        return output
input_size = 7
num_layers = 4
hidden_size = 128
model1 = Model1(input_size,hidden_size,num_layers, device).double().to(device)
model1.load_state_dict(torch.load("/content/drive/MyDrive/Stock_lstm.pth"))

<All keys matched successfully>

In [None]:
class Model2(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, device):
        super(Model2,self).__init__()

        self.fc2 = nn.Sequential(nn.Linear(140, 70),
                                 nn.ReLU(),
                                 nn.Linear(70, 2))
        self.flat = nn.Flatten()

    def positional_encoding(self, var, size):
        inv_freq = 1.0 / (
            10000
            ** (torch.arange(0, size, 2, device=self.device).float() / size)
        )
        pos_enc_a = torch.sin(var.repeat(1, size // 2) * inv_freq)
        pos_enc_b = torch.cos(var.repeat(1, size // 2) * inv_freq)
        pos_enc = torch.cat([pos_enc_a, pos_enc_b], dim=-1)
        return pos_enc


    def forward(self, lead_data):

        data = self.flat(lead_data)
        output = self.fc2(data)

        return output
input_size = 7
num_layers = 3
hidden_size = 64
model2 = Model2(input_size,hidden_size,num_layers, device).double().to(device)
try:
  model2.load_state_dict(torch.load("/content/drive/MyDrive/Stock.pth"))
except:
  print("Not Found")

In [None]:
class Model3(nn.Module):
    def __init__(self, model, model2):
        super(Model3,self).__init__()
        self.fc = nn.Sequential(nn.Linear(2, 16),
                                nn.ReLU(),
                                nn.Linear(16, 1))

    def forward(self, lead_data):
        output = self.fc(lead_data)

        return output

model3 = Model3(model1, model2).double().to(device)

In [None]:
val_losses = []
train_losses = []
epochs = 100
criterion = nn.MSELoss()
# optimizer = optim.AdamW(model1.parameters(), lr=0.01)
# optimizer = optim.AdamW(model2.parameters(), lr=0.01)
optimizer = optim.AdamW(model3.parameters(), lr=0.001)

for epoch in range(epochs):
    train_loss = 0.0
    val_loss = 0.0


    model3.train()
    for i, (data, label) in enumerate(tqdm(train_loader)):

        # label = F.one_hot(label, num_classes=2)
        label = label.double()
        label = label.unsqueeze(1)
        label = label.to(device)

        optimizer.zero_grad()
        outputs1 = model1(data)
        outputs2 = model2(data)
        outputs = torch.cat((outputs1, outputs2), dim = 1)
        outputs = model3(outputs)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * data.size(0)
        # if i % 100 == 0:
        #   print("EPOCH : {}/{}, MIN_LOSS : {}, LOSS : {}".format(epoch + 1, epochs, min_loss, loss.item()))
          # print("EPOCH : {}/{}, LOSS : {}".format(epoch + 1, epochs, loss.item()))

    train_loss /= i
    train_losses.append(train_loss)

    # Validation
    # model.eval()
    for data, label in train_loader:
      # print(label)
      # print(model(data.to(device)))
      print(train_loss, epoch + 1)
      break

100%|██████████| 185/185 [00:20<00:00,  9.02it/s]


5.751244572792627 1


100%|██████████| 185/185 [00:21<00:00,  8.52it/s]


0.17563761748547413 2


100%|██████████| 185/185 [00:20<00:00,  8.83it/s]


0.02625479811641058 3


100%|██████████| 185/185 [00:21<00:00,  8.78it/s]


0.02109323747468302 4


100%|██████████| 185/185 [00:21<00:00,  8.65it/s]


0.016077687969518483 5


100%|██████████| 185/185 [00:21<00:00,  8.76it/s]


0.011561366958553889 6


100%|██████████| 185/185 [00:21<00:00,  8.48it/s]


0.007554652654369132 7


100%|██████████| 185/185 [00:20<00:00,  8.93it/s]


0.0044654551169834664 8


100%|██████████| 185/185 [00:22<00:00,  8.35it/s]


0.0025809730977444222 9


100%|██████████| 185/185 [00:20<00:00,  9.20it/s]


0.0017275154649062884 10


100%|██████████| 185/185 [00:22<00:00,  8.36it/s]


0.0013999461494692091 11


100%|██████████| 185/185 [00:20<00:00,  9.00it/s]


0.0013246775925311448 12


100%|██████████| 185/185 [00:21<00:00,  8.43it/s]


0.0012891321556748314 13


100%|██████████| 185/185 [00:20<00:00,  9.16it/s]


0.001280858724729189 14


100%|██████████| 185/185 [00:21<00:00,  8.47it/s]


0.0012798353669067537 15


100%|██████████| 185/185 [00:20<00:00,  8.95it/s]


0.0012813456675742543 16


100%|██████████| 185/185 [00:21<00:00,  8.61it/s]


0.0012940523668708635 17


100%|██████████| 185/185 [00:21<00:00,  8.69it/s]


0.0012837519473794645 18


100%|██████████| 185/185 [00:21<00:00,  8.72it/s]


0.0012765979893001142 19


100%|██████████| 185/185 [00:21<00:00,  8.55it/s]


0.001288669779386644 20


100%|██████████| 185/185 [00:20<00:00,  8.95it/s]


0.0012878140067580724 21


100%|██████████| 185/185 [00:22<00:00,  8.32it/s]


0.001272304935355251 22


100%|██████████| 185/185 [00:20<00:00,  8.99it/s]


0.0013066263081683899 23


100%|██████████| 185/185 [00:22<00:00,  8.34it/s]


0.0012837147809505818 24


100%|██████████| 185/185 [00:20<00:00,  8.91it/s]


0.0012934759913913138 25


100%|██████████| 185/185 [00:22<00:00,  8.33it/s]


0.0013004392287423285 26


100%|██████████| 185/185 [00:20<00:00,  8.94it/s]


0.0013120586566914756 27


100%|██████████| 185/185 [00:22<00:00,  8.34it/s]


0.0013020790168446936 28


100%|██████████| 185/185 [00:20<00:00,  8.98it/s]


0.0013048058325602204 29


100%|██████████| 185/185 [00:22<00:00,  8.38it/s]


0.0013025689042816492 30


100%|██████████| 185/185 [00:20<00:00,  9.02it/s]


0.0012932225222890175 31


100%|██████████| 185/185 [00:22<00:00,  8.34it/s]


0.0013398239605675745 32


100%|██████████| 185/185 [00:20<00:00,  8.89it/s]


0.001316280971211814 33


100%|██████████| 185/185 [00:21<00:00,  8.57it/s]


0.0013065129341897149 34


100%|██████████| 185/185 [00:21<00:00,  8.80it/s]


0.0013215497916487066 35


100%|██████████| 185/185 [00:21<00:00,  8.66it/s]


0.001352151641437932 36


100%|██████████| 185/185 [00:21<00:00,  8.56it/s]


0.0013458186198394674 37


100%|██████████| 185/185 [00:20<00:00,  8.89it/s]


0.001318004623527255 38


100%|██████████| 185/185 [00:21<00:00,  8.61it/s]


0.0013395017083436588 39


100%|██████████| 185/185 [00:20<00:00,  9.05it/s]


0.0012896662939960973 40


100%|██████████| 185/185 [00:22<00:00,  8.29it/s]


0.0013850477380368252 41


100%|██████████| 185/185 [00:20<00:00,  8.91it/s]


0.0013145542461178578 42


100%|██████████| 185/185 [00:22<00:00,  8.37it/s]


0.0013107135689341344 43


100%|██████████| 185/185 [00:20<00:00,  8.96it/s]


0.0013515949090739008 44


100%|██████████| 185/185 [00:22<00:00,  8.33it/s]


0.0013788770276072957 45


100%|██████████| 185/185 [00:20<00:00,  8.99it/s]


0.0013277255457290128 46


100%|██████████| 185/185 [00:22<00:00,  8.22it/s]


0.0013178036741386637 47


100%|██████████| 185/185 [00:20<00:00,  8.91it/s]


0.0013927756375921268 48


100%|██████████| 185/185 [00:22<00:00,  8.29it/s]


0.0014024495016437615 49


100%|██████████| 185/185 [00:20<00:00,  8.89it/s]


0.0013388262329936955 50


100%|██████████| 185/185 [00:22<00:00,  8.28it/s]


0.0013727328075215349 51


100%|██████████| 185/185 [00:20<00:00,  8.99it/s]


0.0014068558281665176 52


100%|██████████| 185/185 [00:21<00:00,  8.45it/s]


0.001322071525553149 53


100%|██████████| 185/185 [00:20<00:00,  8.84it/s]


0.0013690016778801454 54


100%|██████████| 185/185 [00:21<00:00,  8.53it/s]


0.0013118759418014494 55


100%|██████████| 185/185 [00:21<00:00,  8.72it/s]


0.0013883832479948386 56


100%|██████████| 185/185 [00:21<00:00,  8.59it/s]


0.00135080454506548 57


100%|██████████| 185/185 [00:21<00:00,  8.65it/s]


0.001377048187356282 58


100%|██████████| 185/185 [00:21<00:00,  8.69it/s]


0.0013514036065179111 59


100%|██████████| 185/185 [00:21<00:00,  8.53it/s]


0.00135565243762318 60


100%|██████████| 185/185 [00:21<00:00,  8.72it/s]


0.0013749284136151151 61


100%|██████████| 185/185 [00:21<00:00,  8.46it/s]


0.0013144394449431543 62


100%|██████████| 185/185 [00:20<00:00,  8.96it/s]


0.0013771891312902675 63


100%|██████████| 185/185 [00:22<00:00,  8.35it/s]


0.0014168492909248862 64


100%|██████████| 185/185 [00:20<00:00,  9.00it/s]


0.0013616269108786734 65


100%|██████████| 185/185 [00:22<00:00,  8.26it/s]


0.0013442763251320892 66


100%|██████████| 185/185 [00:20<00:00,  8.87it/s]


0.0013388727334816386 67


100%|██████████| 185/185 [00:22<00:00,  8.28it/s]


0.0013607706359641425 68


100%|██████████| 185/185 [00:20<00:00,  8.96it/s]


0.0013919932846078273 69


100%|██████████| 185/185 [00:22<00:00,  8.25it/s]


0.0013316391511606998 70


100%|██████████| 185/185 [00:20<00:00,  8.96it/s]


0.0013563660055006526 71


100%|██████████| 185/185 [00:22<00:00,  8.30it/s]


0.001363168291310904 72


100%|██████████| 185/185 [00:20<00:00,  8.96it/s]


0.001408305995233016 73


100%|██████████| 185/185 [00:22<00:00,  8.27it/s]


0.0013898626417029928 74


100%|██████████| 185/185 [00:20<00:00,  8.92it/s]


0.0013320189459099857 75


100%|██████████| 185/185 [00:22<00:00,  8.27it/s]


0.0013446289945035106 76


100%|██████████| 185/185 [00:20<00:00,  8.85it/s]


0.0013497720942245803 77


100%|██████████| 185/185 [00:22<00:00,  8.32it/s]


0.0013941980354709356 78


100%|██████████| 185/185 [00:20<00:00,  8.84it/s]


0.0013352346013190428 79


100%|██████████| 185/185 [00:21<00:00,  8.51it/s]


0.0013203057119817434 80


100%|██████████| 185/185 [00:21<00:00,  8.75it/s]


0.0013790423370341916 81


100%|██████████| 185/185 [00:21<00:00,  8.58it/s]


0.0014359199436462652 82


100%|██████████| 185/185 [00:21<00:00,  8.55it/s]


0.0013295800155914777 83


100%|██████████| 185/185 [00:21<00:00,  8.81it/s]


0.001384207440748128 84


100%|██████████| 185/185 [00:21<00:00,  8.51it/s]


0.0014235855033748162 85


100%|██████████| 185/185 [00:20<00:00,  9.00it/s]


0.0013241515654954315 86


100%|██████████| 185/185 [00:22<00:00,  8.29it/s]


0.0013206195657637046 87


100%|██████████| 185/185 [00:20<00:00,  9.03it/s]


0.0013263757629025596 88


100%|██████████| 185/185 [00:22<00:00,  8.27it/s]


0.0013876049291594746 89


100%|██████████| 185/185 [00:20<00:00,  9.05it/s]


0.001325232753143716 90


100%|██████████| 185/185 [00:22<00:00,  8.30it/s]


0.0013553083377262941 91


100%|██████████| 185/185 [00:20<00:00,  9.01it/s]


0.0013740383741986802 92


100%|██████████| 185/185 [00:22<00:00,  8.30it/s]


0.0013561161300664206 93


100%|██████████| 185/185 [00:20<00:00,  9.05it/s]


0.001376439758600972 94


100%|██████████| 185/185 [00:22<00:00,  8.39it/s]


0.0013858588660767416 95


100%|██████████| 185/185 [00:20<00:00,  8.88it/s]


0.0013490823307993945 96


100%|██████████| 185/185 [00:21<00:00,  8.44it/s]


0.0013300062103669846 97


100%|██████████| 185/185 [00:21<00:00,  8.60it/s]


0.0013675716072603448 98


100%|██████████| 185/185 [00:22<00:00,  8.39it/s]


0.0013858963891335337 99


100%|██████████| 185/185 [00:21<00:00,  8.62it/s]


0.0013380598470260528 100


In [None]:
torch.save(model3.state_dict(), "/content/drive/MyDrive/Stock_comb.pth")

In [None]:
for data, labels in train_loader:
  outputs1 = model1(data.to(device)) * max_vwap
  outputs2 = model2(data.to(device)) * max_vwap
  data = torch.cat((outputs1, outputs2), dim = 1)
  outputs3 = model3(data.to(device))
  label = labels * max_vwap
  for i in range(batch_size):
    print("original - ", int(label[i].item()), end = " \t")
    print("lstm - ", int(outputs1[i].item()), end = "\t")
    print("regression -", int(outputs2[i].item()), end = "\t")
    print("combination - ", int(outputs3[i].item()))
  break

original -  967 	lstm -  982	regression - 989	comb -  850
original -  693 	lstm -  711	regression - 693	comb -  608
original -  844 	lstm -  855	regression - 853	comb -  737
original -  1249 	lstm -  1246	regression - 1271	comb -  1084
original -  865 	lstm -  876	regression - 880	comb -  757
original -  879 	lstm -  869	regression - 866	comb -  749
original -  1282 	lstm -  1249	regression - 1262	comb -  1082
original -  1109 	lstm -  1121	regression - 1131	comb -  971
original -  1026 	lstm -  1021	regression - 1032	comb -  885
original -  852 	lstm -  892	regression - 885	comb -  767
original -  773 	lstm -  792	regression - 797	comb -  685
original -  851 	lstm -  860	regression - 868	comb -  745
