In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import torch
from torch import nn
import torch.nn.functional as F
from random import randint
%matplotlib inline

In [2]:
#logisitic functions
def merge_tables(path):
    tables = list()
    for f in os.scandir(path):
        if f.is_file():
            tables.append(pd.read_csv(f))
    return pd.concat(tables, axis=0)

def load_symbol(symbol):
    symbol = symbol.lower()
    path = os.path.abspath(symbol)
    
    daily = merge_tables(os.path.join(path, f'{symbol}_daily'))
    intraday = merge_tables(os.path.join(path, f'{symbol}_intraday'))
    sentiment = merge_tables(os.path.join(path, f'{symbol}_sentiment'))
    
    return daily, intraday, sentiment

In [3]:
#math functions
def sma(df, metric, n):
    return df[metric].rolling(window=n).mean()

def ema(df, metric, n):
    return df[metric].ewm(span=n, adjust=False).mean()

def typical_price(df, metrics):
    return df[metrics].mean(axis=1)

def moving_standard_dev(df, metric, n):
    return df[metric].rolling(n).std()

def bollinger_bands(df, metric, n, n_dev, avg_func):
    ma = avg_func(df[[metric]], metric, n)
    stdev = moving_standard_dev(df[[metric]], metric, n)
    up_b_band = ma + (n_dev * stdev)
    down_b_band = ma - (n_dev * stdev)
    
    up_b_band.name = f'up_band_{n}'
    down_b_band.name = f'down_band_{n}'
    
    return pd.concat([up_b_band, down_b_band], axis=1)
    
def diff(df, metrics):
    assert(len(metrics) == 2)
    dif = df[metrics[0]] - df[metrics[1]]
    return dif

def sharpe_ratio(df, metric):
    k = np.sqrt(252)
    return (k * df[metric].pct_change(-1).mean()) / df[metric].std()

In [4]:
#visualization functions
def plot_intersect(x, background, f, g):
    plt.figure(figsize=(22.5, 7.5))
    
    plt.plot(x, background, '-')
    plt.plot(x, f, '-')
    plt.plot(x, g, '-')
    gi = np.nan_to_num(g, 0)
    idx = np.argwhere(np.diff(np.sign(f - gi))).flatten()
    plt.plot(x[idx], f[idx], 'ro')
    
#     plt.autofmt_xdate()
    plt.plot()

In [5]:
def random_window(df, n):
    idx = randint(0, len(df) - n)
    return df.iloc[idx:idx + n]

In [6]:
os.chdir('Data')
folders = {f.path[2:]: f.path for f in os.scandir() if f.is_dir()}
symbols = list(folders.keys())

In [18]:
symbols

['aapl',
 'amd',
 'amzn',
 'baba',
 'bac',
 'brk.b',
 'crm',
 'csco',
 'dis',
 'fb',
 'ge',
 'googl',
 'gpro',
 'intc',
 'ko',
 'msft',
 'nflx',
 'nvda',
 'sbux',
 'spy',
 't',
 'tcehy',
 'tsla',
 'twtr',
 'v',
 'voo',
 'vz',
 'wmt']

In [7]:
aapl_day, aapl_intra, aapl_sntmt = load_symbol('sbux')
aapl_day.sort_values('Time', ascending=True, inplace=True)
aapl_day.reset_index(inplace=True, drop=True)
aapl_day.drop_duplicates('Time', inplace=True)

In [8]:
aapl_day['TP'] = typical_price(aapl_day, ['High', 'Low', 'Close'])

In [9]:
bands = bollinger_bands(aapl_day, 'TP', 20, 2, sma)

aapl_day['Upper_Bollinger_Band'] = bands['up_band_20']
aapl_day['Lower_Bollinger_Band'] = bands['down_band_20']

aapl_day['Width'] = diff(aapl_day, ['Upper_Bollinger_Band', 'Lower_Bollinger_Band'])

aapl_day['SMA_50'] = sma(aapl_day, 'TP', 50)
aapl_day['SMA_200'] = sma(aapl_day, 'TP', 200)

aapl_day['EMA_50'] = ema(aapl_day, 'TP', 50)
aapl_day['EMA_200'] = ema(aapl_day, 'TP', 200)

aapl_day['pct_return'] = aapl_day['Close'].pct_change(-1)
aapl_day['cum_return'] = aapl_day['pct_return'].cumsum()

In [10]:
sntmt = aapl_sntmt.groupby('time').mean()

In [11]:
aapl = aapl_day.merge(sntmt, how='left', left_on='Time', right_index=True)

In [12]:
aapl.fillna(0, inplace=True)

In [13]:
data_cols = list(aapl.columns)[1:]

In [14]:
class regressor(torch.nn.Module):
    def __init__(self):
        super(regressor, self).__init__()
        self.in_layer = nn.Linear(16, 16)
        self.layer_1 = nn.Linear(16, 32)
        self.layer_2 = nn.Linear(32, 16)
        self.layer_3 = nn.Linear(16, 4)
        self.output = nn.Linear(4, 1)
        
        self.ReLU = nn.ReLU()
        
        
    def forward(self, x):
        x = self.in_layer(x)
        x = self.ReLU(x)
        x = self.layer_1(x)
        x = self.ReLU(x)
        x = self.layer_2(x)
        x = self.ReLU(x)
        x = self.layer_3(x)
        x = self.ReLU(x)
        x = self.output(x)
        return x

In [15]:
reg_net = regressor()
reg_net = reg_net.float()
optimizer = torch.optim.Adam(reg_net.parameters(), lr=.0001)
criterion = torch.nn.MSELoss()

In [16]:
aapl.tail()

Unnamed: 0,Time,Open,High,Low,Close,Volume,TP,Upper_Bollinger_Band,Lower_Bollinger_Band,Width,SMA_50,SMA_200,EMA_50,EMA_200,pct_return,cum_return,compound,neg,neu,pos
120762,2020-03-05,77.9,77.9,75.28,76.19,12545266,76.456667,93.662432,75.615772,18.04666,87.415469,87.480897,85.796984,84.536312,0.011282,1.186928,0.0,0.0,0.0,0.0
120768,2020-03-06,73.46,75.71,72.05,75.34,18099474,74.366667,94.082996,73.965374,20.117623,87.134436,87.465847,85.348737,84.435122,0.063674,1.250602,0.37665,0.0165,0.8955,0.088
120773,2020-03-09,70.58,74.03,70.08,70.83,15930310,71.646667,94.715731,71.885473,22.830258,86.799918,87.437622,84.811401,84.307874,-0.05396,1.196642,0.0,0.0,0.0,0.0
120776,2020-03-10,72.56,74.95,70.03,74.87,18354674,73.283333,94.707973,70.511064,24.196909,86.501418,87.422306,84.35932,84.198177,0.09909,1.295732,-0.2023,0.12,0.778,0.101
120778,2020-03-11,72.83,74.27,67.4,68.12,17088240,69.93,94.815007,68.627366,26.187641,86.138151,87.389989,83.793464,84.056205,0.0,0.0,-0.0172,0.009,0.991,0.0


In [17]:
aapl.head()

Unnamed: 0,Time,Open,High,Low,Close,Volume,TP,Upper_Bollinger_Band,Lower_Bollinger_Band,Width,SMA_50,SMA_200,EMA_50,EMA_200,pct_return,cum_return,compound,neg,neu,pos
0,2000-02-08,34.25,35.0,34.06,35.0,2692000,34.686667,0.0,0.0,0.0,0.0,0.0,34.686667,34.686667,0.014493,0.014493,0.0,0.0,0.0,0.0
1,2000-02-09,34.81,34.88,34.06,34.5,2209500,34.48,0.0,0.0,0.0,0.0,0.0,34.678562,34.68461,0.078125,0.092618,0.0,0.0,0.0,0.0
3,2000-02-10,34.5,34.69,31.56,32.0,3025900,32.75,0.0,0.0,0.0,0.0,0.0,34.602932,34.66536,-0.026764,0.065854,0.0,0.0,0.0,0.0
5,2000-02-11,31.88,33.5,31.88,32.88,2399200,32.753333,0.0,0.0,0.0,0.0,0.0,34.530399,34.646335,-0.031232,0.034622,0.0,0.0,0.0,0.0
9,2000-02-14,33.38,34.81,32.88,33.94,2605400,33.876667,0.0,0.0,0.0,0.0,0.0,34.504762,34.638677,-0.019925,0.014697,0.0,0.0,0.0,0.0


In [19]:
aapl_intra.head()

Unnamed: 0,Time,Open,High,Low,Close,Volume
0,2020-02-10 16:00:00,87.5,87.56,87.49,87.54,154137
1,2020-02-10 15:59:00,87.4,87.5,87.4,87.5,49857
2,2020-02-10 15:58:00,87.5,87.51,87.4,87.4,118522
3,2020-02-10 15:57:00,87.485,87.51,87.485,87.5,41951
4,2020-02-10 15:56:00,87.5,87.5,87.445,87.485,44644
