In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from metaflow import Flow, get_metadata
from matplotlib import pyplot as plt

np.set_printoptions(suppress=True)

In [177]:
run = Flow('TraderFlow').latest_successful_run
data = run.data.stocks
data

{'^GSPC': {'train': {'seq': array([[ 1.69172909,  2.20257546,  2.08958377, ..., -0.01395266,
            0.02474421,  0.15521702],
          [ 2.20257546,  2.08958377,  1.81155431, ...,  0.02474421,
            0.15521702,  0.11453366],
          [ 2.08958377,  1.81155431,  1.79176885, ...,  0.15521702,
            0.11453366,  0.11787097],
          ...,
          [ 0.27114871,  0.32049318,  0.42418808, ...,  1.1105609 ,
            1.0691624 ,  1.13765666],
          [ 0.32049318,  0.42418808,  0.41282535, ...,  1.0691624 ,
            1.13765666,  1.13694152],
          [ 0.42418808,  0.41282535,  0.31318289, ...,  1.13765666,
            1.13694152,  1.13773612]]),
   'price': array([1283.27, 1347.56, 1333.34, ..., 1213.45, 1213.55, 1211.92])},
  'test': {'seq': array([[1.12478419, 1.04659585, 0.9351139 , ..., 0.8527142 , 0.84603959,
           0.92335387],
          [1.04659585, 0.9351139 , 0.90086678, ..., 0.84603959, 0.92335387,
           0.95140314],
          [0.9351139 , 0.9

In [129]:
def plot_spread(array, title, xlabel, ylabel):
    mean = smooth(np.mean(array, 0), 20)
    std = smooth(np.std(array, 0), 20)

    fig = go.Figure()

    colors = plotly.colors.sequential.Magenta

    upper_bound = go.Scatter(x=list(range(len(mean))), 
                             y=mean+std,
                             mode='lines',
                             fillcolor='rgba(0, 138, 196, 0.1)',
                             fill='tonexty',
                             line=dict(width=0),
                             hoverinfo='skip')

    trace = go.Scatter(x=list(range(len(mean))), 
                             y=mean,
                             mode='lines',
                             fillcolor='rgba(0, 138, 196, 0.1)',
                             fill='tonexty',
                             line=dict(color='#008ac4', width=2))

    lower_bound = go.Scatter(x=list(range(len(mean))), 
                             y=mean-std,
                             mode='lines',
                             line=dict(width=0),
                             hoverinfo='skip')

    data = [lower_bound, trace, upper_bound]

    layout = go.Layout(
        title_x=0.5,
        xaxis=dict(title=xlabel),
        yaxis=dict(title=ylabel),
        title=title,
        showlegend = False)

    fig = go.Figure(data=data, layout=layout)

    return fig

In [130]:
plot_spread(np.array(logs['portfolio']), 
        title='Spread of different runs', 
        xlabel='Step', 
        ylabel='Portfolio value')

In [293]:
import plotly
from math import floor
import pickle
import plotly.graph_objects as go
from scipy.signal import savgol_filter

def smooth(y, factor):
    box = np.ones(factor)/factor
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth[:-(factor//2)]

def plot_runs(log_file='logs', train_test='train'):
    with open(f'data/{log_file}.pkl', 'rb') as handle:
        logs = pickle.load(handle)
        
    fig = go.Figure()
    
    hold       = smooth(logs['baseline'], 4)
    portfolios = logs['portfolio']

    for run, portfolio in enumerate(portfolios):
        scale = run / len(portfolios)
        
        portfolio = smooth(portfolio, 4)
        
        fig.add_trace(go.Scatter(x=list(range(len(portfolio))), 
                                 y=portfolio,
                                 mode='lines',
                                 line=dict(color=f'rgba(0, 138, 196, {scale + .2})'),
                                 name=f'Run {run+1}'))
        
    fig.add_trace(go.Scatter(x=list(range(len(hold))), 
                             y=hold,
                             mode='lines',
                             line=dict(color='#ff8a00', width=3),
                             name=f'Original'))
    
    fig.update_layout(
        xaxis=dict(title='Step'),
        yaxis=dict(title='Portfolio value ($)'),
        title='Portfolio value of different runs',
        title_x=0.5,
        showlegend = False)
    
    fig.show()
    
    fig = plot_spread(np.array(logs['portfolio']), 
            title='Spread of different runs', 
            xlabel='Step', 
            ylabel='Portfolio value ($)')
    
    fig.add_trace(go.Scatter(x=list(range(len(hold))), 
                             y=smooth(logs['baseline'], 20),
                             mode='lines',
                             line=dict(color='#ff8a00', width=3),
                             name=f'Original'))
    
    fig.show()
    
plot_runs('logs')

In [292]:
plot_runs('logs')

In [152]:
plot_runs('logs-test')

In [250]:
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from utils import split_sequence

class Stock:
    def __init__(self, ticker, window_size, train_size=.3, normalize=True):
        self.ticker      = ticker
        self.window_size = window_size
        self.train_size  = train_size
        self.normalize   = normalize
        
        self._fetch_stock()
        
        if self.normalize:
            self._normalize()
            
        self._sequence()
        
    def _fetch_stock(self):
        # Fetch the historic data of the stock
        stock_data = yf.Ticker(self.ticker)
        
        self.prices = stock_data.history(period='1d', 
                                   start='2010-1-1', 
                                   end='2020-1-1')
        
        self.prices = self.prices.dropna()['Close'].values
        
    def _normalize(self):
        # The size of the train split
        train_split = int(len(self.prices)*self.train_size)
        train_set = self.prices[:train_split]
        
        # Standardscale the data
        scaler = StandardScaler()
        scaler.fit(train_set.reshape(-1,1))
        
        self.prices_norm = scaler.transform(self.prices.reshape(-1,1)).flatten()
        
    def _sequence(self):
        prices = self.prices_norm if hasattr(self, 'prices_norm') else self.prices
        self.sequence = split_sequence(prices, self.window_size)

In [251]:
stock = Stock('AAPL', 200, normalize=False)

In [262]:
stock.sequence.shape

(2318, 200)

In [263]:
int(len(stock.prices)*stock.train_size)

755

In [268]:
np.diff(stock.prices[750:780], 1)

array([-0.9 ,  0.26, -0.69,  2.83,  2.11, -0.87, -1.89, -0.39,  0.18,
       -1.03,  0.8 , -0.4 , -2.32, -1.98,  2.53, -0.43, -0.34,  0.6 ,
        1.16, -7.95, -1.33,  1.25,  1.05, -0.18, -0.16, -0.24, -1.41,
        1.94, -0.06])

In [254]:
stock_price   = stock.prices[-1+(200)]
stock_price_1 = stock.prices[-1+(200-1)]
stock_price_n = stock.prices[-1+(200-100)]

stock_price, stock_price_1, stock_price_n

(39.03, 37.49, 30.41)