In [68]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [47]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("gpu")
else:
    device = torch.device('cpu')
print(torch.__version__)
print('CUDA available:', torch.cuda.is_available())
print('CUDA version:', torch.version.cuda)
print('cuDNN version:', torch.backends.cudnn.version())

gpu
2.1.2+cu121
CUDA available: True
CUDA version: 12.1
cuDNN version: 8902


In [26]:
data = pd.read_csv("Technology_Firm_Stock_Price.csv")
data = data.sort_values(by="Date")
data = data.drop(columns=["Date"])
data

Unnamed: 0,ADSK,ORCL,PTC,CDNS,FICO,CRM,SNPS,INTU,TYL,ADBE,...,NOW,MSFT,PANW,CDW,NTAP,STX,HPQ,WDC,AAPL,HPE
0,187.830002,50.359100,76.370003,71.440002,382.920013,166.990005,142.869995,258.836151,306.239990,334.429993,...,291.239990,154.779541,78.470001,137.363739,55.579060,51.156185,18.257439,64.771545,73.152641,14.081444
1,184.949997,50.181740,75.430000,70.419998,381.920013,166.169998,141.759995,257.105133,306.670013,331.809998,...,291.100006,152.852264,78.943336,135.712097,54.631645,49.811756,18.029110,63.774597,72.441467,13.733110
2,187.119995,50.443100,76.269997,70.849998,384.000000,173.449997,141.979996,259.079346,310.209991,333.709991,...,292.869995,153.247345,80.086670,134.690536,54.498821,49.219864,18.099367,62.550629,73.018684,13.689568
3,187.500000,50.555126,75.919998,71.070000,388.489990,176.000000,142.960007,259.137634,311.269989,333.390015,...,292.910004,151.850082,80.546669,134.537781,54.853001,49.811756,18.143276,66.785164,72.675278,13.619902
4,189.949997,50.752010,76.980003,72.279999,391.329987,177.330002,145.860001,265.371277,310.989990,337.869995,...,295.649994,154.268814,79.523331,134.337265,55.906673,50.200710,18.380386,67.752487,73.844353,13.724401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,242.759995,105.790077,173.970001,275.820007,1168.880005,266.339996,524.460022,623.131348,415.000000,598.750000,...,697.549988,374.579987,298.209991,226.630005,88.882095,85.160004,30.180000,52.660000,193.600006,17.090000
1002,245.070007,105.780121,175.309998,274.959991,1164.619995,266.220001,520.250000,623.910156,416.410004,598.260010,...,701.229980,374.660004,300.820007,228.729996,88.862213,85.870003,30.410000,52.759998,193.050003,17.230000
1003,245.110001,105.531090,175.720001,274.640015,1170.609985,266.720001,518.099976,628.173767,415.600006,596.080017,...,703.760010,374.070007,297.500000,228.550003,88.086731,85.680000,30.240000,52.419998,193.149994,17.090000
1004,244.910004,105.839890,175.919998,273.239990,1169.339966,265.579987,517.409973,627.075439,418.290009,595.520020,...,702.460022,375.279999,295.579987,228.699997,87.698990,86.790001,30.180000,52.930000,193.580002,17.170000


In [40]:
scaler = MinMaxScaler(feature_range=(0,1))
data_reshape = data["ADSK"].values.reshape(-1,1)
scaled_data = pd.DataFrame(scaler.fit_transform(data_reshape))

scaled_data

Unnamed: 0,0
0,0.251273
1,0.237310
2,0.247831
3,0.249673
4,0.261550
...,...
1001,0.517574
1002,0.528773
1003,0.528967
1004,0.527997


In [33]:
company_dict = {}

for ticker in data.columns:
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = pd.DataFrame(scaler.fit_transform(data[ticker].values.reshape(-1,1)))
    company_dict[ticker] = {'scaler': scaler, 'scaled_data': scaled_data}
    
    print(f"{ticker} is added to the dictionary!")

ADSK is added to the dictionary!
ORCL is added to the dictionary!
PTC is added to the dictionary!
CDNS is added to the dictionary!
FICO is added to the dictionary!
CRM is added to the dictionary!
SNPS is added to the dictionary!
INTU is added to the dictionary!
TYL is added to the dictionary!
ADBE is added to the dictionary!
ANSS is added to the dictionary!
ANET is added to the dictionary!
CSCO is added to the dictionary!
MSI is added to the dictionary!
FFIV is added to the dictionary!
JNPR is added to the dictionary!
APH is added to the dictionary!
GLW is added to the dictionary!
TDY is added to the dictionary!
ROP is added to the dictionary!
TRMB is added to the dictionary!
KEYS is added to the dictionary!
ZBRA is added to the dictionary!
TEL is added to the dictionary!
JBL is added to the dictionary!
IT is added to the dictionary!
CTSH is added to the dictionary!
EPAM is added to the dictionary!
ACN is added to the dictionary!
IBM is added to the dictionary!
VRSN is added to the dic

In [38]:
company_dict["ADSK"]

{'scaler': MinMaxScaler(),
 'scaled_data':              0
 0     0.251273
 1     0.237310
 2     0.247831
 3     0.249673
 4     0.261550
 ...        ...
 1001  0.517574
 1002  0.528773
 1003  0.528967
 1004  0.527997
 1005  0.521065
 
 [1006 rows x 1 columns]}

In [52]:
def rolling_window(data):
    time_step = 22

    X, y = [], []

    for i in range(len(data) - time_step):
        X.append(data.iloc[i : (i+time_step)])
        y.append(data.iloc[i+time_step])
    
    X = np.array(X)
    y = np.array(y)

    print(X.shape)
    print(y.shape)

    return X, y

In [62]:
X, y = rolling_window(company_dict["AAPL"]["scaled_data"])
y[1]

(984, 22, 1)
(984, 1)


array([0.16405758])

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train.shape
y_train.shape

(787, 1)

In [66]:
class RollingWindowDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float), torch.tensor(self.y[idx], dtype=torch.float)


In [49]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        self.lstm = nn.LSTM(input_size = input_dim, hidden_size = hidden_dim, num_layers = layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device) 

        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device) 

        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])

        return out

In [71]:
train_data = RollingWindowDataset(X_train, y_train)
test_data = RollingWindowDataset(X_test, y_test)

In [84]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_dim=X.shape[2], hidden_dim=50, layer_dim=1, output_dim=y_train.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()

In [85]:
def train_lstm(batch_size, epochs, train_data):
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        print('epochs {}/{}'.format(epoch+1,epochs))

        running_loss = .0

        model.train()

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad() # Clear the gradients
            output = model(data)  # Forward pass
            loss = criterion(output, target)
            loss.backward() # Compute gradient

            optimizer.step() # Update params
            running_loss += loss.item()


        train_loss = running_loss/len(train_loader)
        
        print(f'train_loss {train_loss}')
            # if batch_idx % 10 == 0:
            #     print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

In [87]:
train_lstm(32, 30, train_data)

epochs 1/30
train_loss 0.03098879128228873
epochs 2/30
train_loss 0.02987977970391512
epochs 3/30
train_loss 0.028968321289867163
epochs 4/30
train_loss 0.02812098519410938
epochs 5/30
train_loss 0.02728206507395953
epochs 6/30
train_loss 0.026443690895102918
epochs 7/30
train_loss 0.02560245622880757
epochs 8/30
train_loss 0.024753609504550696
epochs 9/30
train_loss 0.023892798079177737
epochs 10/30
train_loss 0.0230163794523105
epochs 11/30
train_loss 0.02212125529535115
epochs 12/30
train_loss 0.021204771744087338
epochs 13/30
train_loss 0.020264711882919072
epochs 14/30
train_loss 0.01929940430447459
epochs 15/30
train_loss 0.01830789214465767
epochs 16/30
train_loss 0.017290139230899514
epochs 17/30
train_loss 0.016247369358316065
epochs 18/30
train_loss 0.015182475675828755
epochs 19/30
train_loss 0.014100548680871725
epochs 20/30
train_loss 0.013009482119232417
epochs 21/30
train_loss 0.011920583879109471
epochs 22/30
train_loss 0.0108491549664177
epochs 23/30
train_loss 0.00981