In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from copy import deepcopy as dc
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import dill as pk

In [2]:
nifty50_comapanies = [
    'ASIANPAINT.NS',
    'BRITANNIA.NS',
    'CIPLA.NS',
    'EICHERMOT.NS',
    'NESTLEIND.NS',
    'GRASIM.NS',
    'HEROMOTOCO.NS',
    'HINDALCO.NS',
    'HINDUNILVR.NS',
    'ITC.NS',
    'LT.NS',
    'M&M.NS',
    'RELIANCE.NS',
    'TATACONSUM.NS',
    'TATAMOTORS.NS',
    'TATASTEEL.NS',
    'WIPRO.NS',
    'APOLLOHOSP.NS',
    'DRREDDY.NS',
    'TITAN.NS',
    'SBIN.NS',
    'SHRIRAMFIN.NS',
    'BPCL.NS',
    'KOTAKBANK.NS',
    'INFY.NS',
    'BAJFINANCE.NS',
    'ADANIENT.NS',
    'SUNPHARMA.NS',
    'JSWSTEEL.NS',
    'HDFCBANK.NS',
    'TCS.NS',
    'ICICIBANK.NS',
    'POWERGRID.NS',
    'MARUTI.NS',
    'INDUSINDBK.NS',
    'AXISBANK.NS',
    'HCLTECH.NS',
    'ONGC.NS',
    'NTPC.NS',
    'COALINDIA.NS',
    'BHARTIARTL.NS',
    'TECHM.NS',
    'LTIM.NS',
    'DIVISLAB.NS',
    'ADANIPORTS.NS',
    'HDFCLIFE.NS',
    'SBILIFE.NS',
    'ULTRACEMCO.NS',
    'BAJAJ-AUTO.NS',
    'BAJAJFINSV.NS'
]

In [3]:
def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    
    df.set_index('Date', inplace=True)
    
    for i in range(1, n_steps+1):
        df[f'Close(t-{i})'] = df['Close'].shift(i)
        
    df.dropna(inplace=True)
    
    return df

In [4]:
class StockPredictorDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]

In [6]:
device = 'cuda:0'

In [7]:
class LSTM_Stock(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers
        
        self.LSTM1= nn.LSTM(input_size=input_size, hidden_size=self.hidden_size, num_layers=num_stacked_layers, batch_first=True)
        self.LSTM2 = nn.LSTM(input_size=self.hidden_size, hidden_size=hidden_size, num_layers=num_stacked_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        
        hidden = (h0, c0)
        out, hidden = self.LSTM1(x, hidden)
        out, _ = self.LSTM2(out, hidden)
        out = self.fc1(out[:, -1, :])

        return out
    
model = LSTM_Stock(1, 20, 10)
model.to(device)

LSTM_Stock(
  (LSTM1): LSTM(1, 20, num_layers=10, batch_first=True)
  (LSTM2): LSTM(20, 20, num_layers=10, batch_first=True)
  (fc1): Linear(in_features=20, out_features=1, bias=True)
)

In [8]:
def train_one_epoch():
    model.train(True)
    running_loss = 0.0
    
    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        
        output = model(x_batch)
        loss = loss_fun(output, y_batch)
        running_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch_index % 100 == 99:
            avg_loss = running_loss / 100
            running_loss = 0

In [9]:
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0
    
    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        
        with torch.no_grad():
            output = model(x_batch)
            loss = loss_fun(output, y_batch)
            running_loss += loss.item()
            
    avg_loss = running_loss / len(test_loader)

In [10]:
for comp in nifty50_comapanies:
    
    # ------------------------------------------------------------------------------------------------ for training data
    df = pd.read_csv('Data/'+comp+'-train.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df = df[['Date', 'Close']]
    
    lookback = 7
    shifted_df = prepare_dataframe_for_lstm(df, lookback)
    
    shifted_df_to_np = shifted_df.to_numpy()
    
    scaler = MinMaxScaler(feature_range=(-1, 1))
    shifted_df_to_np = scaler.fit_transform(shifted_df_to_np)
    
    X = shifted_df_to_np[:, 1:]
    y = shifted_df_to_np[:, 0]
    
    X = dc(np.flip(X, axis=1))
    
    # ------------------------------------------------------------------------------------------------ for testing data
    df_test = pd.read_csv('Data/'+comp+'-test.csv')
    df_test['Date'] = pd.to_datetime(df_test['Date'])

    df_test = df_test[['Date', 'Close']]

    lookback = 7
    shifted_df_test = prepare_dataframe_for_lstm(df_test, lookback)

    shifted_df_test_to_np = shifted_df_test.to_numpy()
    shifted_df_test_to_np = scaler.fit_transform(shifted_df_test_to_np)

    X_test = shifted_df_test_to_np[:, 1:]
    y_test = shifted_df_test_to_np[:, 0]

    X_test = dc(np.flip(X_test, axis=1))
    
    x_train = X
    y_train = y

    x_test = X_test
    y_test = y_test
    
    x_train = x_train.reshape((-1, lookback, 1))
    x_test = x_test.reshape((-1, lookback, 1))

    y_train = y_train.reshape((-1, 1))
    y_test = y_test.reshape((-1, 1))
    
    # ------------------------------------------------------------------------------------------------ convert to tensors
    x_train = torch.tensor(x_train).to(dtype=torch.float)
    x_test = torch.tensor(x_test).to(dtype=torch.float)
    y_train = torch.tensor(y_train).to(dtype=torch.float)
    y_test = torch.tensor(y_test).to(dtype=torch.float)
    
    # ------------------------------------------------------------------------------------------------ convert to pytorch dataset

    train_dataset = StockPredictorDataset(x_train, y_train)
    test_dataset = StockPredictorDataset(x_test, y_test)
    
    batch_size = 16

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
    learning_rate = 0.001
    num_epochs = 10
    loss_fun = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        train_one_epoch()
        validate_one_epoch()

    # ------------------------------------------------------------------------------------------------ Saving Models
    with open('Models/'+comp+'.pt', 'wb') as file:
        pk.dump(model, file)  

In [11]:
# ------------------------------------------------------------------------------------------------ importing model
with open('Models/NTPC.NS.pt', 'rb') as file:
    model = pk.load(file)
    
model

LSTM_Stock(
  (LSTM1): LSTM(1, 20, num_layers=10, batch_first=True)
  (LSTM2): LSTM(20, 20, num_layers=10, batch_first=True)
  (fc1): Linear(in_features=20, out_features=1, bias=True)
)