## Imports

In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import sklearn
from sklearn.preprocessing import MinMaxScaler
import yfinance
import fix_yahoo_finance as yf
import torch
from torch import nn 
import time
from torch.utils.data import DataLoader
from IPython.display import clear_output
import math, time
from sklearn.metrics import mean_squared_error



*** `fix_yahoo_finance` was renamed to `yfinance`. ***
Please install and use `yfinance` directly using `pip install yfinance -U`

More information: https://github.com/ranaroussi/yfinance



## Load data using yahoo finance

In [None]:
def get_stock_data(ticker, start_date, end_date):
    '''
        takes in stock name: "AAPL"
        starting date : '2010-01-01'
        ending date : '2015-01-01'
    '''
    data = yf.download(ticker, start_date, end_date) 
    return data

## split data into training and testing 

In [None]:
def split_data(stock, lookback: int):
    '''
        
    '''
    data_raw = stock.to_numpy()
    data = []
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data);
    test_set_size = int(np.round(0.2*data.shape[0]));
    train_set_size = data.shape[0] - (test_set_size);
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    y_train = torch.from_numpy(y_train).type(torch.Tensor)

    x_test = torch.from_numpy(x_test).type(torch.Tensor)
    y_test = torch.from_numpy(y_test).type(torch.Tensor)
            
    return [x_train, y_train, x_test, y_test]

## model class

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach())) #2
        out = self.fc(out[:, -1, :]) 
        return out

## data preprocessing

In [None]:
def preprocess_data(data, inputed_scaler):
    data = data[['Close']]
    scaler = inputed_scaler
    data['Close'] = scaler.fit_transform(data['Close'].values.reshape(-1,1))
    return data

## model creation

In [None]:
input_dim = 1
hidden_dim = 16
num_layers = 3
output_dim = 1
saved_model_path = "predict_stock_price_using_lstm_in_pytorch"

model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

## load stock tickers

In [None]:
snp_data = pd.read_csv('constituents_csv.csv')
tickers = snp_data['Symbol']
tickers.head()

0     MMM
1     AOS
2     ABT
3    ABBV
4    ABMD
Name: Symbol, dtype: object

# training

In [None]:
list_of_Scores = []
failed = 0
num_epochs = 5
scaler = MinMaxScaler(feature_range=(-1, 1))

for t in range(num_epochs):

    for i,ticker in enumerate(tickers):

        clear_output(wait=True)
        try:
            data = yf.download(r, '2010-01-r1', '2023-02-02') 
            if(data['Close'].empty):
                raise ValueError('Error')    
            data = preprocess_data(data, scaler)

        except: # in case the r is not foundrthe stock is skipped
            print("didnt find anything \n\n")
            continue
    
        hist = np.zeros(num_epochs)
        lstm = []
        lookback = 21

        x_train, y_train, x_test, y_test = split_data(data, lookback)

        y_train_pred = model(x_train)

        loss = criterion(y_train_pred, y_train)
        print("Epoch ", t, "MSE: ", loss.item())
        hist[t] = loss.item()

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
        # testing part
        y_test_pred = model(x_test)

        y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
        y_train = scaler.inverse_transform(y_train.detach().numpy())
        testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))


[*********************100%***********************]  1 of 1 completed
Epoch  4 MSE:  0.0001908777339849621


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Close'] = scaler.fit_transform(data['Close'].values.reshape(-1,1))


## save weights

In [None]:
save_model = "predict_stock_price_weights"
torch.save(model.state_dict(), save_model)
list(model.parameters())

[Parameter containing:
 tensor([[ 0.4341],
         [-0.0786],
         [ 0.0391],
         [ 0.4482],
         [ 0.4537],
         [ 0.3636],
         [ 0.0818],
         [-0.4042],
         [-0.4087],
         [-0.0378],
         [-0.1639],
         [ 0.3221],
         [ 0.0529],
         [-0.4152],
         [ 0.2148],
         [-0.3201],
         [ 0.7079],
         [-0.2633],
         [ 0.0650],
         [ 0.3662],
         [ 0.1797],
         [ 0.5411],
         [-0.0758],
         [-0.1590],
         [-0.0701],
         [-0.1155],
         [ 0.0424],
         [ 0.1348],
         [ 0.2604],
         [-0.2676],
         [ 0.1159],
         [-0.2882],
         [-0.3739],
         [-0.0382],
         [ 0.5268],
         [-0.3181],
         [ 0.6131],
         [ 0.4973],
         [-0.2911],
         [-0.5464],
         [-0.5739],
         [ 0.4347],
         [-0.5882],
         [-0.4750],
         [ 0.3217],
         [ 0.5119],
         [ 0.0558],
         [ 0.3174],
         [ 0.8171