In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import torch
from torch import nn
import torch.nn.functional as F
from random import randint
%matplotlib inline

In [2]:
os.chdir('Data')
folders = {f.path[2:]: f.path for f in os.scandir() if f.is_dir()}
symbols = list(folders.keys())

In [3]:
#logisitic functions
def merge_tables(path):
    tables = list()
    for f in os.scandir(path):
        if f.is_file():
            tables.append(pd.read_csv(f))
    return pd.concat(tables, axis=0)

def load_symbol(symbol):
    symbol = symbol.lower()
    path = os.path.abspath(symbol)
    
    daily = merge_tables(os.path.join(path, f'{symbol}_daily'))
    intraday = merge_tables(os.path.join(path, f'{symbol}_intraday'))
    sentiment = merge_tables(os.path.join(path, f'{symbol}_sentiment'))
    
    return daily, intraday, sentiment

In [4]:
#math functions
def sma(df, metric, n):
    return df[metric].rolling(window=n).mean()

def ema(df, metric, n):
    return df[metric].ewm(span=n, adjust=False).mean()

def typical_price(df, metrics):
    return df[metrics].mean(axis=1)

def moving_standard_dev(df, metric, n):
    return df[metric].rolling(n).std()

def bollinger_bands(df, metric, n, n_dev, avg_func):
    ma = avg_func(df[[metric]], metric, n)
    stdev = moving_standard_dev(df[[metric]], metric, n)
    up_b_band = ma + (n_dev * stdev)
    down_b_band = ma - (n_dev * stdev)
    
    up_b_band.name = f'up_band_{n}'
    down_b_band.name = f'down_band_{n}'
    
    return pd.concat([up_b_band, down_b_band], axis=1)
    
def diff(df, metrics):
    assert(len(metrics) == 2)
    dif = df[metrics[0]] - df[metrics[1]]
    return dif

In [5]:
#visualization functions
def plot_intersect(x, background, f, g):
    plt.figure(figsize=(22.5, 7.5))
    
    plt.plot(x, background, '-')
    plt.plot(x, f, '-')
    plt.plot(x, g, '-')
    gi = np.nan_to_num(g, 0)
    idx = np.argwhere(np.diff(np.sign(f - gi))).flatten()
    plt.plot(x[idx], f[idx], 'ro')
    
#     plt.autofmt_xdate()
    plt.plot()

In [6]:
aapl_day, _, aapl_sntmt = load_symbol('aapl')
aapl_day.sort_values('Time', ascending=True, inplace=True)
aapl_day.reset_index(inplace=True, drop=True)
aapl_day.drop_duplicates('Time', inplace=True)

In [7]:
aapl_day['TP'] = typical_price(aapl_day, ['High', 'Low', 'Close'])

In [8]:
bands = bollinger_bands(aapl_day, 'TP', 20, 2, sma)

aapl_day['Upper_Bollinger_Band'] = bands['up_band_20']
aapl_day['Lower_Bollinger_Band'] = bands['down_band_20']

aapl_day['Width'] = diff(aapl_day, ['Upper_Bollinger_Band', 'Lower_Bollinger_Band'])

aapl_day['SMA_50'] = sma(aapl_day, 'TP', 50)
aapl_day['SMA_200'] = sma(aapl_day, 'TP', 200)

aapl_day['EMA_50'] = ema(aapl_day, 'TP', 50)
aapl_day['EMA_200'] = ema(aapl_day, 'TP', 200)

In [9]:
sntmt = aapl_sntmt.groupby('time').mean()

In [10]:
aapl = aapl_day.merge(sntmt, how='left', left_on='Time', right_index=True)

In [11]:
aapl.fillna(0, inplace=True)

In [12]:
def random_window(df, n):
    idx = randint(0, len(df) - n)
    return df.iloc[idx:idx + n]

In [13]:
data_cols = list(aapl.columns)[1:]

In [14]:
class regressor(torch.nn.Module):
    def __init__(self):
        super(regressor, self).__init__()
        self.in_layer = nn.Linear(16, 16)
        self.layer_1 = nn.Linear(16, 32)
        self.layer_2 = nn.Linear(32, 16)
        self.layer_3 = nn.Linear(16, 4)
        self.output = nn.Linear(4, 1)
        
        self.ReLU = nn.ReLU()
        
        
    def forward(self, x):
        x = self.in_layer(x)
        x = self.ReLU(x)
        x = self.layer_1(x)
        x = self.ReLU(x)
        x = self.layer_2(x)
        x = self.ReLU(x)
        x = self.layer_3(x)
        x = self.ReLU(x)
        x = self.output(x)
        return x

In [15]:
reg_net = regressor()
reg_net = reg_net.float()
optimizer = torch.optim.Adam(reg_net.parameters(), lr=.0001)
criterion = torch.nn.MSELoss()

In [16]:
aapl.tail()

Unnamed: 0,Time,Open,High,Low,Close,Volume,TP,Upper_Bollinger_Band,Lower_Bollinger_Band,Width,SMA_50,SMA_200,EMA_50,EMA_200,compound,neg,neu,pos
10055,2020-01-30,320.5435,324.09,318.75,323.87,31685808,322.236667,328.593694,294.608462,33.985232,288.283796,230.851314,291.416298,242.907585,0.0,0.0,0.0,0.0
10057,2020-01-31,320.93,322.68,308.29,309.51,49897096,313.493333,328.222024,296.458132,31.763892,289.228596,231.420147,292.282064,243.609931,0.0,0.0,0.0,0.0
10059,2020-02-03,304.3,313.49,302.22,308.66,43496401,308.123333,327.42078,298.25471,29.166071,290.059845,231.95223,292.90329,244.251855,-0.0314,0.0995,0.826,0.075
10061,2020-02-04,315.31,319.64,313.6345,318.85,34154134,317.374833,326.611696,301.050944,25.560752,291.142855,232.521554,293.862959,244.979447,0.443433,0.009833,0.897833,0.0925
10063,2020-02-05,323.52,324.76,318.95,321.45,29384908,321.72,326.114755,303.827552,22.287203,292.329289,233.110471,294.955392,245.743035,0.172244,0.087778,0.801778,0.110444
