In [9]:
import pandas as pd
import numpy as np
import seaborn as sns

from metaflow import Flow, get_metadata
from matplotlib import pyplot as plt

import plotly
from math import floor
import pickle
import plotly.graph_objects as go
from scipy.signal import savgol_filter
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from utils import split_sequence

np.set_printoptions(suppress=True)

In [2]:
def plot_spread(array, title, xlabel, ylabel):
    mean = smooth(np.mean(array, 0), 20)
    std = smooth(np.std(array, 0), 20)

    fig = go.Figure()

    colors = plotly.colors.sequential.Magenta

    upper_bound = go.Scatter(x=list(range(len(mean))), 
                             y=mean+std,
                             mode='lines',
                             fillcolor='rgba(0, 138, 196, 0.1)',
                             fill='tonexty',
                             line=dict(width=0),
                             hoverinfo='skip')

    trace = go.Scatter(x=list(range(len(mean))), 
                             y=mean,
                             mode='lines',
                             fillcolor='rgba(0, 138, 196, 0.1)',
                             fill='tonexty',
                             line=dict(color='#008ac4', width=2))

    lower_bound = go.Scatter(x=list(range(len(mean))), 
                             y=mean-std,
                             mode='lines',
                             line=dict(width=0),
                             hoverinfo='skip')

    data = [lower_bound, trace, upper_bound]

    layout = go.Layout(
        title_x=0.5,
        xaxis=dict(title=xlabel),
        yaxis=dict(title=ylabel),
        title=title,
        showlegend = False)

    fig = go.Figure(data=data, layout=layout)

    return fig

In [170]:
with open(f'data/logs.pkl', 'rb') as handle:
    logs = pickle.load(handle)
        
np.average(np.array([p[-1] for p in logs['portfolio']]))

[1.0898176412514309, 1.1244586165548918, 0.914116254175035]

In [182]:
def smooth(y, factor):
    box = np.ones(factor)/factor
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth[(factor//2):-(factor//2)]

def plot_runs(log_file='logs', train_test='train'):
    with open(f'data/{log_file}.pkl', 'rb') as handle:
        logs = pickle.load(handle)
        
    fig = go.Figure()
    
    hold       = smooth(logs['baseline'], 4)
    portfolios = logs['portfolio']

    for run, portfolio in enumerate(portfolios):
        scale = run / len(portfolios)
        
        portfolio = smooth(portfolio, 4)
        
        fig.add_trace(go.Scatter(x=list(range(len(portfolio))), 
                                 y=portfolio,
                                 mode='lines',
                                 line=dict(color=f'rgba(0, 138, 196, {scale + .2})'),
                                 name=f'Run {run+1}'))
        
    fig.add_trace(go.Scatter(x=list(range(len(hold))), 
                             y=hold,
                             mode='lines',
                             line=dict(color='#ff8a00', width=3),
                             name=f'Original'))
    
    fig.update_layout(
        xaxis=dict(title='Step'),
        yaxis=dict(title='Portfolio value ($)'),
        title='Portfolio value of different runs',
        title_x=0.5,
        showlegend = False)
    
    fig.show()
    
    fig = plot_spread(np.array(logs['portfolio']), 
            title='Spread of different runs', 
            xlabel='Step', 
            ylabel='Portfolio value ($)')
    
    fig.add_trace(go.Scatter(x=list(range(len(hold))), 
                             y=smooth(logs['baseline'], 20),
                             mode='lines',
                             line=dict(color='#ff8a00', width=3),
                             name=f'Original'))
    
    fig.show()
    
plot_runs('logs-MSFT')

In [180]:
plot_runs('logs-AMD')

In [181]:
plot_runs('logs-AAPL')

In [161]:
class Stock:
    def __init__(self, 
                 ticker, 
                 window_size, 
                 train_size=.3, 
                 normalize=True,
                 diff=True,
                 price_look_back=2,
                 start_date='2010-1-1', 
                 end_date='2020-1-1'):
        
        self.ticker          = ticker
        self.window_size     = window_size
        self.train_size      = train_size
        self.start_date      = start_date
        self.end_date        = end_date
        self.price_look_back = price_look_back
        self.diff            = diff
        self.normalize       = normalize
        
        self.stock_prices_raw = np.array([])
        self.stock_prices     = np.array([])
        self.stock_prices_1   = np.array([])
        self.stock_prices_n   = np.array([])
        self.stock_seq        = np.array([])
        
        self._fetch_stock()
        self._set_stocks()
        self._sequence()
        
    def _fetch_stock(self):
        # Fetch the historic data of the stock
        stock_data = yf.Ticker(self.ticker)
        
        self.stock_prices_raw = stock_data.history(period='1d', 
                                   start=self.start_date, 
                                   end=self.end_date)
        
        self.stock_prices_raw = self.stock_prices_raw.dropna()['Close'].values
        
    def _set_stocks(self):
        """
        Create different stock array that follow the same index as the sequence data
        """
        self.stock_prices = np.array(self.stock_prices_raw[(self.window_size-1):])
        self.stock_prices_1 = np.array(self.stock_prices_raw[(self.window_size-2):])
        self.stock_prices_n = np.array(self.stock_prices_raw[(self.window_size-1-self.price_look_back):])
        
    def _diff(self, stocks):
        return np.diff(stocks, 1, prepend=[stocks[0]])
        
    def _normalize(self, stocks):
        # The size of the train split
        train_split = int(len(stocks)*self.train_size)
        train_set = stocks[:train_split]
        
        # Standardscale the data
        scaler = StandardScaler()
        scaler.fit(train_set.reshape(-1,1))
        
        return scaler.transform(stocks.reshape(-1,1)).flatten()
        
    def _sequence(self):
        stocks = self.stock_prices_raw
        
        if self.diff:
            stocks = self._diff(stocks)
        
        if self.normalize:
            stocks = self._normalize(stocks)
        
        self.stock_seq = split_sequence(stocks, self.window_size)
        
stock = Stock('AAPL', 3, normalize=False, diff=True)

In [162]:
# print(stock.stock_prices_raw[:5])
# print(np.diff(stock.stock_prices_raw[:5], 1, prepend=[0]))
print(stock.stock_prices[:5])
print(stock.stock_prices_1[:5])
print(stock.stock_prices_n[:5])

stock.stock_seq

[26.58 26.16 26.11 26.29 26.05]
[26.54 26.58 26.16 26.11 26.29]
[26.13 26.54 26.58 26.16 26.11]


array([[ 0.  ,  0.41,  0.04],
       [ 0.41,  0.04, -0.42],
       [ 0.04, -0.42, -0.05],
       ...,
       [ 0.27,  5.62, -0.11],
       [ 5.62, -0.11,  1.72],
       [-0.11,  1.72,  2.12]])

In [147]:
# print(stock.stock_prices_raw[:5])
# print(np.diff(stock.stock_prices_raw[:5], 1, prepend=[0]))
print(stock.stock_prices[:5])
print(stock.stock_prices_1[:5])
print(stock.stock_prices_n[:5])

stock.stock_seq

[2 7 8 5 6]
[3 2 7 8 5]
[1 3 2 7 8]


array([[-1.,  1., -5.],
       [ 1., -5.,  7.],
       [-5.,  7., -1.],
       [ 7., -1., -9.],
       [-1., -9., -1.]])

In [141]:
a = 0

print(stock.stock_prices[a])
print(stock.stock_prices_1[a])
print(stock.stock_prices_n[a])
print(stock.stock_seq[a])

2
3
1
[ 1  2 -1]


In [142]:
a = 1

print(stock.stock_prices[a])
print(stock.stock_prices_1[a])
print(stock.stock_prices_n[a])
print(stock.stock_seq[a])

7
2
3
[ 2 -1  5]


In [56]:
len(stock.stock_prices)

2515

In [58]:
stock.stock_seq.shape

(2515, 3)