In [283]:
'''
Author: Tyler Chin

References (helped massively): https://www.youtube.com/watch?v=q_HS4s1L8UI&t=1768s

A script that enables the discord bot to predict stocks!
'''

'\nAuthor: Tyler Chin\n\nReferences (helped massively): https://www.youtube.com/watch?v=q_HS4s1L8UI&t=1768s\n\nA script that enables the discord bot to predict stocks!\n'

In [284]:

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import yfinance as yf
import random

from copy import deepcopy as dc
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [285]:
'''
Constants that are used throughtout the script
'''
#data prep
peer_into = 14 # 1 <= n <= days the stocks existed
training_split = 0.95 # 0 < n < 1

# Datasets
test_size = 16

#trainin
epochs = 10 # 0 < n < infinity
lr = 0.1
loss_function = nn.MSELoss()

In [286]:
def data_processor(peer_into: int, df) -> None:
    '''
    Prepares the data to be read by LSTM

    Creates <peer_into> new rows of data which shift each row down by one
    '''
    for i in range(1, peer_into): 
        df[f'Close (t-{i})'] = df['Close'].shift(i) # Loops to shift in each items into place

    df.dropna(inplace=True) # drop all values with nan because those are not helpful for the LSTM

In [287]:
btc = yf.Ticker('BTC-USD')
data = btc.history(period='max')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
yesterdays_close = round(data['Close'][-1].item(), 2)

  yesterdays_close = round(data['Close'][-1].item(), 2)


In [288]:
data = data.drop(['Open', 'Volume','High','Low', 'Dividends', 'Stock Splits'], axis=1)
data_processor(peer_into, data)
data = data.to_numpy()

In [289]:
scaler = MinMaxScaler((-1, 1))
data = scaler.fit_transform(data)

In [290]:
X = data[:, 1:]
X = dc(np.flip(X, axis=1))
Y = data[:, 0]

original = dc(X)

split = int(len(X) * training_split)

X_train = np.array(np.empty(peer_into - 1))
Y_train = np.array([])

X_test = np.array(np.empty(peer_into - 1))
Y_test = np.array([])
for i in range(len(X)): # randomizes the data a bit
    seed = random.randint(0, len(X))

    if split >= seed:
        X_train = np.vstack([X_train, X[i]])
        Y_train = np.append(Y_train, Y[i])
        continue
    
    X_test = np.vstack([X_test, X[i]])
    Y_test = np.append(Y_test, Y[i])

In [291]:
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
original = np.expand_dims(original, axis=-1)


Y_train = Y_train.reshape((-1, 1))
Y_test = Y_test.reshape((-1, 1))

In [292]:
X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()
X_test = torch.tensor(X_test).float()
Y_test = torch.tensor(Y_test).float()
original = torch.tensor(original).float()

In [293]:
class BTCDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
    
    def __len__(self):
        return len(self.X) - 1

    def __getitem__(self, i):
        return self.X[i], self.Y[i]

In [294]:
training_dataset = BTCDataset(X_train, Y_train)
testing_dataset = BTCDataset(X_test, Y_test)

train_loader = DataLoader(training_dataset, batch_size=test_size, shuffle=True)
test_loader = DataLoader(testing_dataset, batch_size=test_size, shuffle=False)

In [295]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    break

In [296]:
class LSTM(nn.Module):
    def __init__(self, input, hidden, layers):
        super().__init__()
        self.hidden = hidden
        self.layers = layers
        self.lstm = nn.LSTM(input, hidden, layers, batch_first=True)
        self.fc = nn.Linear(hidden, 1)
    
    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.layers, batch_size, self.hidden).to(device)
        c0 = torch.zeros(self.layers, batch_size, self.hidden).to(device)
        output, _ = self.lstm(x, (h0, c0))
        output = self.fc(output[:, -1, :])
        return output
    

In [297]:
def train_one_epoch():
    model.train(True)
    overall_loss = 0.0

    for i, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        loss = loss_function(output, y_batch)
        overall_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

In [298]:
def validate_one_epoch():
    model.train(False)
    overall_loss = 0.0

    for i, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            overall_loss += loss.item()

        avg_loss_across_batches = overall_loss / len(test_loader)

In [299]:
model = LSTM(1, 4, 1)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for e in range(epochs):
    train_one_epoch()
    validate_one_epoch()


In [None]:
with torch.no_grad():
    predicted = model(original.to(device)).to('cpu').numpy()

predicted = predicted.flatten()
temp = np.zeros((original.shape[0], peer_into))
temp[:, 0] = predicted
temp = scaler.inverse_transform(temp)

predicted = temp[:, 0]
todays_close = round(predicted[-1].item(), 2)
yesterdays_close, todays_close


(116977.09, 112162.17)