In [29]:
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

In [61]:
data = pd.read_csv('Dataset/INFY_DATA.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6351 entries, 0 to 6350
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Datetime         6351 non-null   object 
 1   Open             6351 non-null   float64
 2   Volume           6351 non-null   int64  
 3   PE_Ratio         6351 non-null   float64
 4   52_Week_High     6351 non-null   float64
 5   52_Week_Low      6351 non-null   float64
 6   Is_52_week_high  6351 non-null   int64  
 7   Is_52_week_low   6351 non-null   int64  
 8   Is_high          6351 non-null   int64  
 9   Is_low           6351 non-null   int64  
 10  Maket_index      6351 non-null   float64
 11  Sector_index     6351 non-null   float64
 12  SMA_20           6332 non-null   float64
 13  SMA_50           6302 non-null   float64
 14  EMA_20           6351 non-null   float64
 15  EMA_50           6351 non-null   float64
 16  BB_upper         6332 non-null   float64
 17  BB_lower      

In [62]:
data['Datetime'] = pd.to_datetime(data['Datetime'])
data.ffill(inplace=True)
data[['Maket_index', 'Sector_index', 'SMA_20', 'SMA_50', 'EMA_20', 'EMA_50', 'BB_upper', 'BB_lower', 'RSI', 'MACD']] = data[['Maket_index', 'Sector_index', 'SMA_20', 'SMA_50', 'EMA_20', 'EMA_50', 'BB_upper', 'BB_lower', 'RSI', 'MACD']].fillna(data[['Maket_index', 'Sector_index', 'SMA_20', 'SMA_50', 'EMA_20', 'EMA_50', 'BB_upper', 'BB_lower', 'RSI', 'MACD']].mean())
features = data[['Open', 'Volume', 'PE_Ratio', '52_Week_High', '52_Week_Low', 
               'Is_52_week_high', 'Is_52_week_low', 'Is_high', 'Is_low', 'Maket_index', 
               'Sector_index', 'SMA_20', 'SMA_50', 'EMA_20', 'EMA_50', 'BB_upper', 
               'BB_lower', 'RSI', 'MACD']].values
scaler = MinMaxScaler(feature_range=(0, 1))
features_scaled = scaler.fit_transform(features)
target = data['Target'].values

In [63]:
# Verify no NaN values remain
print("NaN values after cleaning:")
print(data.isna().sum())

NaN values after cleaning:
Datetime           0
Open               0
Volume             0
PE_Ratio           0
52_Week_High       0
52_Week_Low        0
Is_52_week_high    0
Is_52_week_low     0
Is_high            0
Is_low             0
Maket_index        0
Sector_index       0
SMA_20             0
SMA_50             0
EMA_20             0
EMA_50             0
BB_upper           0
BB_lower           0
RSI                0
MACD               0
Target             0
Delta_Target       0
dtype: int64


In [64]:
# Print feature statistics for debugging
print("Features shape:", features_scaled.shape)
print("Features min/max:", features_scaled.min(), features_scaled.max())

Features shape: (6351, 19)
Features min/max: 0.0 1.0000000000000018


In [65]:
print("Target shape:", target.shape)
print("Target min/max:", target.min(), target.max())

Target shape: (6351,)
Target min/max: 1723.0 1939.75


## Using Lookback of 60 mins 

In [67]:
lookback = 60

def create_sequences(data, target, lookback):
    sequences = []
    labels = []
    for i in range(len(data) - lookback):
        sequences.append(data[i:i+lookback])
        labels.append(target[i+lookback])  # target is the value to predict (next day)
    return np.array(sequences), np.array(labels)

X, y = create_sequences(features_scaled, target, lookback)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [68]:
class StockLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(StockLSTM, self).__init__()
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Ensure the input is of the correct shape (batch_size, sequence_length, input_size)
        batch_size, seq_len, _ = x.size()
        
        # Pass through LSTM layer
        lstm_out, (hn, cn) = self.lstm(x)
        
        # Use the last hidden state for prediction
        last_hidden_state = lstm_out[:, -1, :]  # Last time step's output
        
        # Fully connected layer to output the prediction
        out = self.fc(last_hidden_state)
        
        return out


In [72]:
# Model initialization
input_size = X_train.shape[2]  # Number of features
hidden_size = 128  # You can experiment with this
output_size = 1  # Predicting one value (the next day's price)
num_layers = 4 # Number of LSTM layers

model = StockLSTM(input_size, hidden_size, output_size, num_layers)
print(model)

StockLSTM(
  (lstm): LSTM(19, 128, num_layers=4, batch_first=True)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)


In [73]:
# Hyperparameters
epochs = 50
learning_rate = 0.01

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()  # Set model to training mode
    epoch_loss = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        output = model(data)
        
        # Compute loss
        loss = criterion(output.squeeze(), target)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
    
    avg_epoch_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_epoch_loss:.4f}")

Epoch [1/50], Loss: 3194582.5665
Epoch [2/50], Loss: 2895656.5886
Epoch [3/50], Loss: 2614529.7911
Epoch [4/50], Loss: 2350374.1962
Epoch [5/50], Loss: 2109923.4193
Epoch [6/50], Loss: 1889630.5601
Epoch [7/50], Loss: 1687453.7563
Epoch [8/50], Loss: 1502518.2611
Epoch [9/50], Loss: 1333208.9684
Epoch [10/50], Loss: 1178943.6535
Epoch [11/50], Loss: 1038683.5206
Epoch [12/50], Loss: 911480.0055
Epoch [13/50], Loss: 796526.1954
Epoch [14/50], Loss: 692901.8710
Epoch [15/50], Loss: 600050.9288
Epoch [16/50], Loss: 517078.0075
Epoch [17/50], Loss: 443297.4197
Epoch [18/50], Loss: 377961.5969
Epoch [19/50], Loss: 320468.3845
Epoch [20/50], Loss: 270059.6657
Epoch [21/50], Loss: 226145.1117
Epoch [22/50], Loss: 188147.4444
Epoch [23/50], Loss: 155462.7253
Epoch [24/50], Loss: 127542.6151
Epoch [25/50], Loss: 103869.0969
Epoch [26/50], Loss: 83948.4806
Epoch [27/50], Loss: 67323.5282
Epoch [28/50], Loss: 53571.7790
Epoch [29/50], Loss: 42315.4132
Epoch [30/50], Loss: 33163.2158
Epoch [31/50]

In [74]:
torch.save(model.state_dict(), "LSTM_MODEL/first_model.pt")

In [None]:
# Improved LSTM Model
class ImprovedStockLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(ImprovedStockLSTM, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size, 
            hidden_size, 
            num_layers, 
            batch_first=True, 
            dropout=0.2  # Add dropout between LSTM layers
        )
        
        self.fc1 = nn.Linear(hidden_size, 32)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(32, output_size)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_time_step = lstm_out[:, -1, :]
        
        x = self.fc1(last_time_step)
        x = self.relu(x)
        x = self.dropout(x)
        out = self.fc2(x)
        
        return out