In [49]:
import pandas as pd
import torch
import numpy as np
from torch import nn
import torch.utils.data as data
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import gc
import os
from torch.utils.checkpoint import checkpoint
import torch.multiprocessing as mp

In [39]:
%run Model.ipynb
%run "../Machine Learning Data Gathering/Scripts/Moving Average + Bollinger bands.ipynb"
%run StocksDataSet.ipynb

In [4]:
torch.backends.cudnn.enabled = False

In [5]:
#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256"
# del os.environ["PYTORCH_CUDA_ALLOC_CONF"]

In [6]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
def create_dataset(input, period_out):
    
    X, y = [], []
    
    for i in range(len(input) - period_out - 1):
        X.append(input[i: (i+period_out)])
        y.append(input[i + period_out])
    return torch.Tensor(np.array(X)), torch.Tensor(np.array(y))

def test_func(input_data, output_data, steps_for_input, steps_for_output):
    X,y = [], []
    
    for i in range(len(input_data)):
        end_x = i + steps_for_input
       # print(end_x)
        output_x = end_x + steps_for_output - 1
        if output_x > len(input_data): 
            break
        #print(output_data[end_x - 1 : output_x])
        seq_x, seq_y = input_data[i : end_x], output_data[end_x - 1 : output_x]
        X.append(seq_x), y.append(seq_y)
    
    return np.array(X), np.array(y)

# Splitting the data

In [8]:
# def split_data(X_full, y_full, split_percentage = 0.80, test_scaler = StandardScaler(), mm_scaler = MinMaxScaler()):
#     total_data = len(X_full)
#     test_split = round(split_percentage * total_data)
#     X_train = torch.Tensor(X_full[:test_split])
#     X_test = torch.Tensor(X_full[test_split:])

#     y_train = torch.Tensor(y_full[:test_split])
#     y_test = torch.Tensor(y_full[test_split:])

#     X_train = torch.Tensor(test_scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape))
#     X_test = torch.Tensor(test_scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape))
    
#     y_train = torch.Tensor(mm_scaler.fit_transform(y_train.reshape(-1, y_train.shape[-1])).reshape(y_train.shape))
#     y_test = torch.Tensor(mm_scaler.transform(y_test.reshape(-1, y_test.shape[-1])).reshape(y_test.shape))

#     return X_train, X_test, y_train, y_test

# Predicting the stock price

In [45]:
def predict_stock_price(df, model):
    model = model.to(device)
    preds_test = df[-90:]
    close_price = np.array(preds_test["Close"])
    close_price = close_price[-30:]
    preds_test = preds_test.drop("Close", axis = 1)
    tens_preds = np.array(preds_test)
    torch_test = torch.Tensor(tens_preds).unsqueeze(0)
    test_scaler = StandardScaler()
    mm_scaler = MinMaxScaler()
    X_final = torch.Tensor(test_scaler.fit_transform(torch_test.reshape(-1, torch_test.shape[-1])).reshape(torch_test.shape))
    y_final = torch.Tensor(mm_scaler.fit_transform(close_price.reshape(-1, close_price.shape[-1])).reshape(close_price.shape))
    model.eval()
    with torch.no_grad():
        pred = model(X_final)
    
    preds = pred.detach().cpu()
    print(preds)
    
    preds = mm_scaler.inverse_transform(preds)
    return preds

# Training the model

In [10]:
def train_model(ds, 
                input_size = 16, 
                hidden_size = 256, 
                num_layers = 1, 
                num_classes = 16, 
                batch_first = True, 
                epochs_number = 5000, 
                device = device, 
                learning_rate = 0.001):
    
    model = StocksPredictionModel(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes = num_classes, batch_first=True)
    custom_ds = ds
    
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate)
    epochs = epochs_number
    
    for epoch in range(epochs):
        
        for X_train, X_test, y_train, y_test in custom_ds:
            X_train = torch.Tensor(X_train).squeeze(0)
            model.train()
            model = model.to(device)
            X_train.to(device)
       
            y_pred = model(X_train)
         
          
            optimizer.zero_grad()
            y_pred = y_pred.to(device)
            y_train = y_train.to(device)
            loss = loss_fn(y_pred, y_train)
            loss = loss.to(device)
            # torch.cuda.empty_cache()
            # gc.collect()
            loss.backward()
        
            optimizer.step()
        
            model.eval()
            
        
            with torch.inference_mode():
            
                test_pred = model(X_test)
                test_pred.to(device)
                y_test = y_test.to(device)
                test_loss = loss_fn(test_pred, y_test)
                if epoch == epochs - 1 or epoch % 100 == 0:
                    print(f"Epoch number: {epoch}")
                    print(f"Test Loss is: {test_loss}")
                    print(f"Train Loss is: {loss}")
            torch.cuda.empty_cache()
            gc.collect()
    return model

# Saving the Model

In [33]:
def save_model(model_to_save, model_name = "01_stocks_predictions_model.pth"):
    MODEL_NAME = model_name
    torch.save(model_to_save.state_dict(), f"./Models/{MODEL_NAME}.pth")

In [41]:
def save_with_kwargs(model, name = "01_stocks_predictions_model_with_kwargs"):
    torch.save([model.kwargs, model.state_dict()], f"./Models/{name}.pth")

# Loading the model

In [37]:
def load_model(model_name = "01_stocks_predictions_model"):
    loaded_model = StocksPredictionModel(input_size=16, hidden_size=256, num_layers=1, num_classes = 30, batch_first=True)
    loaded_model.load_state_dict(torch.load(f"./Models/{model_name}.pth"))
    return loaded_model

In [34]:
def load_with_kwargs(model_name):
    kwargs, state_dict = torch.load(f"./Models/{model_name}.pth")
    model = StocksPredictionModel(**kwargs)
    model.load_state_dict(state_dict)
    return model

# Transform data into batches

In [13]:
def get_data_for_mlmodel(input_data, output_data, steps_for_input, steps_for_output):
    X,y = [], []
    print(input_data.shape)
    test = steps_for_input + steps_for_output
    print(len(input_data.index))
   
    rows_amount = len(input_data.index)
    if rows_amount % test != 0:
        input_data = input_data.iloc[rows_amount % test:]
        output_data = output_data.iloc[rows_amount % test:]
        rows_amount = len(input_data.index)
    print(input_data.shape)
    print(rows_amount)

    for i in range(0, rows_amount, test):
        X.append(input_data[i: i + steps_for_input])
        y.append(output_data[i + steps_for_input : (i + steps_for_input) + steps_for_output])
    return np.array(X), np.array(y)


In [14]:
test = StocksDataSet("../Custom LSTM Model/Data/Stocks Data")
our_model = train_model(test, epochs_number=1000, learning_rate=0.001, hidden_size = 256, num_classes=30)


Epoch number: 0
Test Loss is: 0.03750849515199661
Train Loss is: 0.06132309138774872
Epoch number: 0
Test Loss is: 0.10096871107816696
Train Loss is: 0.20337563753128052
Epoch number: 0
Test Loss is: 0.02492496371269226
Train Loss is: 0.2506926357746124
Epoch number: 0
Test Loss is: 1.282423496246338
Train Loss is: 0.19228757917881012
Epoch number: 0
Test Loss is: 0.3497951030731201
Train Loss is: 0.2707898020744324
Epoch number: 0
Test Loss is: 2.2347185611724854
Train Loss is: 0.19641298055648804
Epoch number: 0
Test Loss is: 0.23408961296081543
Train Loss is: 0.21431131660938263
Epoch number: 0
Test Loss is: 0.07669933885335922
Train Loss is: 0.21253609657287598
Epoch number: 0
Test Loss is: 1.5979132652282715
Train Loss is: 0.11829227954149246
Epoch number: 0
Test Loss is: 0.33617883920669556
Train Loss is: 0.2697064280509949
Epoch number: 0
Test Loss is: 1.3618184328079224
Train Loss is: 0.18678365647792816
Epoch number: 0
Test Loss is: 0.010257234796881676
Train Loss is: 0.168033

In [25]:
save_model(our_model)

In [35]:
save_model(our_model, model_name="kwargs_test")

In [42]:
load_test = load_model(model_name="kwargs_test")
save_with_kwargs(load_test, name = "Saved_with_kwargs")

In [43]:
kwargs_model = load_with_kwargs("Saved_with_kwargs")

In [46]:
single_stock = pd.read_csv("./Data/Stocks Data/Technology/Apple Inc. Common Stock.csv", index_col="Date", parse_dates = True)
preds = predict_stock_price(single_stock, kwargs_model)
print(preds)

tensor([[0.3922, 0.3872, 0.3795, 0.3691, 0.3597, 0.3518, 0.3429, 0.3382, 0.3332,
         0.3261, 0.3185, 0.3091, 0.2992, 0.2918, 0.2847, 0.2757, 0.2683, 0.2619,
         0.2590, 0.2539, 0.2520, 0.2493, 0.2488, 0.2486, 0.2486, 0.2501, 0.2505,
         0.2463, 0.2449, 0.2440]])
[[174.11223638 176.4672007  179.0494729  171.73914403 172.63968042
  171.20178223 170.05295017 173.6481843  171.81317002 170.35613972
  169.15846413 169.95914155 169.11922505 169.87177321 168.734658
  169.94567537 168.04826319 175.30188262 176.80903631 172.94394839
  169.63199851 168.2493203  167.28879771 165.24858908 166.0885582
  167.15006965 169.27045494 170.13631214 169.54485804 173.74401052]]


In [48]:
single_stock = pd.read_csv("./Data/Stocks Data/Consumer Discretionary/Airbnb Inc. Class A Common Stock.csv", index_col="Date", parse_dates = True)
preds = predict_stock_price(single_stock, kwargs_model)
print(preds)

tensor([[0.5825, 0.5889, 0.5944, 0.6020, 0.6083, 0.6148, 0.6165, 0.6202, 0.6232,
         0.6231, 0.6225, 0.6197, 0.6143, 0.6127, 0.6090, 0.6007, 0.5911, 0.5820,
         0.5785, 0.5715, 0.5649, 0.5578, 0.5524, 0.5479, 0.5426, 0.5386, 0.5354,
         0.5300, 0.5269, 0.5236]])
[[162.44250405 162.37891901 165.30438805 168.78203076 168.46833658
  168.60483003 168.00649929 167.03023565 165.58322418 163.9930582
  160.73245978 159.95967788 159.45429254 162.38266754 160.8790189
  163.26067722 160.87108964 166.00204355 159.94846535 156.17146623
  157.22488654 158.92783141 160.65242863 155.55792946 157.15256059
  161.48860222 163.37538958 163.53999597 164.75686371 162.7736007 ]]


# Multithreading training

In [None]:
# def train(model):
#     # Construct data_loader, optimizer, etc.
#     for data, labels in data_loader:
#         optimizer.zero_grad()
#         loss_fn(model(data), labels).backward()
#         optimizer.step()  # This will update the shared parameters

def train(model):
    custom_ds = StocksDataSet("../Custom LSTM Model/Data/Stocks Data")
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate)
    epochs = 100
    
    for epoch in range(epochs):
        
        for X_train, X_test, y_train, y_test in custom_ds:
            X_train = torch.Tensor(X_train).squeeze(0)
            model.train()
            model = model.to(device)
            X_train.to(device)
       
            y_pred = model(X_train)
         
          
            optimizer.zero_grad()
            y_pred = y_pred.to(device)
            y_train = y_train.to(device)
            loss = loss_fn(y_pred, y_train)
            loss = loss.to(device)
            # torch.cuda.empty_cache()
            # gc.collect()
            loss.backward()
        
            optimizer.step()
        
            model.eval()
            
        
            with torch.inference_mode():
            
                test_pred = model(X_test)
                test_pred.to(device)
                y_test = y_test.to(device)
                test_loss = loss_fn(test_pred, y_test)
                if epoch == epochs - 1 or epoch % 100 == 0:
                    print(f"Epoch number: {epoch}")
                    print(f"Test Loss is: {test_loss}")
                    print(f"Train Loss is: {loss}")
            torch.cuda.empty_cache()
            gc.collect()
    #return model

In [None]:
if __name__ == '__main__':
    num_processes = 4
    model = StocksPredictionModel(input_size=16, hidden_size=256, num_layers=1, num_classes = 30, batch_first=True)
        # NOTE: this is required for the ``fork`` method to work
    model.share_memory()
    processes = []
    for rank in range(num_processes):
        p = mp.Process(target=train, args=(model,))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    save_model(model, model_name = "Multithreading model test")