# Stock Price Prediction

In [None]:
# initialize API library
# 1. For finnhub-python
#import finnhub
#finnhub_client = finnhub.Client(api_key="bsdb2a7rh5r8dht95tm0")

# 2. For twelvedata
from twelvedata import TDClient
td = TDClient(apikey="2e7d44bf8b294e2381477193c7d7d117")

In [None]:
import os
import os.path
import math

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

import matplotlib.pyplot as plt
from scipy.signal import argrelextrema

look_back = 400
hidden_size = 128
batch_size = 1

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

## Main

In [None]:
# Create dataset with lookback
def create_dataset(data, look_back=look_back):
    dataX, dataY = [], []
    for i in range(len(data)-look_back-1):
        a = data[i:i+look_back]
        dataX.append(a)
        dataY.extend(data[i+look_back])

    return np.array(dataX).squeeze(), np.array(dataY)

In [None]:
# Create Dataset Object
class dataset(Dataset):
    """Dataset Object"""
    def __init__(self, file_name, transform=None):
        self.data = pd.read_csv(file_name)
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        """A map-style dataset return a sample corresponding to the index"""
        #return torch.tensor(self.data.values[:, 1:][idx], dtype=torch.float32)
        values = self.data.values
        return torch.tensor(values[:, 1:][idx], dtype=torch.float32)

In [None]:
# Define the model
class lstm_linear(nn.Module):
    def __init__(self, hidden_size=hidden_size, look_back=look_back):
        super(lstm_linear, self).__init__()
        self.lstm = nn.LSTM(1, hidden_size)
        self.linear = nn.Linear(look_back*hidden_size, 1)
    
    def forward(self, x, hidden_size=hidden_size):
        output, (hidden_state, cell_state) = self.lstm(x)
        y = self.linear(output.reshape(-1, look_back * hidden_size))
        return y

In [None]:
def df_set(X, Y):
    df_set = {}
    if look_back > 1:
        for i in range(look_back):
            df_set[f'trainX{i}'] = X[:, i]
        df_set['trainY'] = Y
    else:
        df_set['trainX'] = X
        df_set['trainY'] = Y
    return df_set

In [None]:
class stock:
    """The stock object with several operations"""
    def __init__(self, stock_code, root_dir=None):
        self.stock_code = stock_code
        self.root_dir = root_dir
        self.stock_data = pd.DataFrame(td.time_series(symbol=self.stock_code,
                                                      interval='1min',
                                                      outputsize='5000',
                                                      end_date='2020-07-28 9:30:00',
                                                      timezone='America/New_York').as_pandas()['close'])
        self.stock_values = scaler.fit_transform(self.stock_data.values[::-1])
    
    def create_train(self):
        """create training data set and store in csv format"""
        self.stock_time_idx = self.stock_data.index[::-1]
        trainX, trainY = create_dataset(self.stock_values)
        stock_train = pd.DataFrame(df_set(trainX, trainY))
        if not os.path.isdir('stock_data'):
            os.mkdir('stock_data')
        stock_train.to_csv(f'stock_data/{self.stock_code}_train.csv')
    
    def create_dataloader(self):
        """create dataloader for training set"""
        train_set = dataset(f'stock_data/{self.stock_code}_train.csv')
        return DataLoader(train_set, batch_size=batch_size, shuffle=False)
    
    def trained_model(self, model, train_loader, epochs=30):
        """training and return the trained model"""
        criterion = nn.MSELoss(reduction='sum')
        optimizer = torch.optim.Adam(model.parameters())

        for epoch in range(epochs):
            running_loss = 0
            for i, data in enumerate(train_loader):
                x = data[:, :-1].reshape(look_back, batch_size, 1).to(device)
                y_true = data[:, -1].reshape(1, batch_size, 1).to(device)
                #x = data[:, 0].reshape(1, 20, 1).to(device)
                #y_true = data[:, 1].reshape(1, 20, 1).to(device)
                
                optimizer.zero_grad()
                y_pred = model(x).reshape(1, batch_size, 1)
                loss = criterion(y_pred, y_true)

                loss.backward()
                optimizer.step()

                # Print statistics
                running_loss += loss.item()
                if i%100 == 99:
                    print(f"epoch:[{epoch+1}/{epochs}], [{i+1}/{len(train_loader)}], loss: {running_loss/100}")
                    running_loss = 0
        print('Training Finished!')
        return model
    
    def predict_trend(self, trained_model, time_interval='2 Days'):
        """predict the stock trend hourly for certain interval of time and by default is one business week.
           Return the result as a dataframe with timestamp and predicted value.
        """
        # Get time index
        # time_now = pd.Timestamp.now(tz='America/New_York').round('5min')
        time_now = pd.Timestamp('2020-07-28 9:35:00', tz='America/New_York').round('1min')
        time_interval = pd.date_range(time_now, time_now+pd.Timedelta(time_interval), freq='1min')
        time_interval = time_interval[time_interval.indexer_between_time('9:30', '16:00')]
        time_interval = time_interval[time_interval.dayofweek<5]
        
        # Get the newest one sample
        start_data = self.stock_values[-look_back:]
        
        # Predict the trend during the time interval
        y_pred_list = start_data
        with torch.no_grad():
            for i in range(len(time_interval)):
                y_pred = trained_model(torch.Tensor(y_pred_list[-look_back:].reshape(look_back, 1, 1)))
                y_pred_list = np.append(y_pred_list, y_pred.squeeze())
                
        return time_interval, scaler.inverse_transform([y_pred_list[look_back:]]).squeeze()
    
    def plot_trend(self, x, y, ax):
        ax.plot(x, y, label='pred trend')
        for i in argrelextrema(y, np.greater)[0]:
            ax.annotate((f'{x[i].hour}:{x[i].minute}:{x[i].second}', f'{y[i]:.2f}'), (x[i], y[i]))
        for i in argrelextrema(y, np.less)[0]:
            ax.annotate((f'{x[i].hour}:{x[i].minute}:{x[i].second}', f'{y[i]:.2f}'), (x[i], y[i]))
        plt.title(f'Predicted Trend: {self.stock_code}')
        plt.legend()

In [None]:
def predict_in_batch(stock_list, epochs=30, time_interval='2 Days'):
    """Input a list of stock code. Save prediction results in pred_result."""
    df_list = []
    figure = plt.figure(figsize=(20, 6*len(stock_list)))
    for i, stock_code in enumerate(stock_list):
        # Initialize model
        model = lstm_linear(hidden_size=hidden_size, look_back=look_back).to(device)
        
        # create stock object and training set
        stock_obj = stock(stock_code)
        stock_obj.create_train()
        train_loader = stock_obj.create_dataloader()
        
        # Training model and return trained model
        print(f'Training model on data of {stock_code}...')
        model = stock_obj.trained_model(model, train_loader, epochs=epochs)
        
        # Predict the trend of next 2 days and plot
        x, y = stock_obj.predict_trend(model.to('cpu'), time_interval=time_interval)
        df_list.append(pd.DataFrame({'datetime':x, f'{stock_code} price':y}).set_index('datetime'))
        print(f'Inference Finished!')
        
        # Plot the figure
        ax = plt.subplot(len(stock_list), 1, i+1)
        stock_obj.plot_trend(x, y, ax)
    
    plt.show()
    print(f'The entire prediction is finished!')
    return figure, pd.concat(df_list, axis=1)

In [None]:
# This block is for modifing input
# Input the list of stock code and make prediction!

# An example is follows:
#stock_list = ['AAPL', 'AMZN', 'TWTR', 'NFLX', 'TSLA', 'GOOG', 'MSFT', 'PG', 'DIS', 'BIIB', 'LLY', 'SBUX', 'INTC', 'FB', 'AEP', 'AEE']
stock_list = ['AMZN']
result_fg, result_df = predict_in_batch(stock_list, epochs=30, time_interval='4 Days')

# If you want to save the result in the 'pred_result'
your_name = 'Qin'
if not os.path.isdir('pred_result'):
    os.mkdir('pred_result')
result_df.to_csv(f'pred_result/pred_result_{your_name}.csv')
result_fg.savefig(f'pred_result/pred_result_{your_name}.png')

# You can modify the example or write your own! Let's begin!

# For Checking Current Update

In [None]:
# **************************************TO DO***********************************************
def get_latest_trend(stock_list):
    """Input a list of stock code and get the newest trend"""
    late_list = []
    for i, stock_code in enumerate(stock_list):
        pd.DataFrame(td.time_series(symbol=self.stock_code,
                                                 interval='1h',
                                                 outputsize='5000',
                                                 timezone='America/New_York').as_pandas()['close'])

In [None]:
#stock_list = ['AAPL', 'AMZN', 'TWTR', 'NFLX', 'TSLA', 'GOOG', 'MSFT', 'PG', 'DIS', 'BIIB', 'LLY', 'SBUX', 'INTC', 'FB', 'AEP', 'AEE']
test = pd.DataFrame(td.time_series(symbol='AMZN',
                            interval='1min',
                            outputsize='1400',
                            start_date='2020-07-28 9:30:00',
                            timezone='America/New_York').as_pandas()['close'])
test.index = test.index.tz_localize('America/New_York')

In [None]:
plt.figure(figsize=(50, 10))
plt.plot(result_df.index, result_df)
plt.plot(test)