In [55]:
import yfinance as yf
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import os
import datetime
from datetime import timedelta, date
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt
from statistics import mean, stdev
import plotly.express as px
import webbrowser
from sklearn.model_selection import train_test_split
webbrowser.register('chrome', None, webbrowser.BackgroundBrowser('C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe'))

In [7]:
symbols = ['ACB', 'F', 'GE', 'DIS', 'AAL', 'GPRO', 'DAL', 'MSFT', 'CCL', 'AAPL', 'FIT', 'SNAP', 'PLUG', 'BAC', 'BA', 'NCLH', 'INO', 'UAL', 'UBER', 'CGC', 'TSLA', 'AMD', 'CRON', 'RCL', 'TWTR', 'GRPN', 'FB', 'SBUX', 'MRO', 'ZNGA', 'BABA', 'T', 'KO', 'APHA', 'USO', 'XOM', 'AMZN', 'MFA', 'JBLU', 'NIO', 'MRNA', 'LUV', 'GM', 'GILD', 'MGM', 'SAVE', 'NFLX', 'NRZ', 'SPCE', 'LK', 'VSLR', 'AMC', 'PENN', 'VOO', 'TLRY', 'HAL', 'NOK', 'NVDA', 'CPRX', 'LYFT', 'SQ', 'SPY', 'V', 'NKE', 'SIRI', 'UCO', 'WORK', 'CPE', 'BYND', 'KOS', 'ET', 'OXY', 'PFE', 'ZM', 'CRBP', 'SPHD', 'FCEL', 'VKTX', 'JPM', 'NTDOY', 'NYMT', 'BP', 'ATVI', 'CSCO', 'WFC', 'WMT', 'GOOGL', 'INTC', 'GLUU', 'AUY', 'VTI', 'ERI', 'TXMD', 'SNE', 'PTON', 'ROKU', 'JNJ', 'IVR', 'MU']

# Feature Engineering

Here I want to create a column that looks at the variance over time, and flags days where the change was greater than the variance. I'll ultimately use that as my target.
The idea behind this is if I see a movement that is statistically unlikely for a stock, then it's noteworthy and I should buy (either the stock or perhaps an option).

In [15]:
"""Look at week long spans"""
start_date = datetime.date(2020, 4, 1)
end_date =  datetime.date(2020, 9, 14)
df = pd.DataFrame()
for i in symbols:
    tckr = yf.Ticker(i)
    results = tckr.history(start=start_date, end = end_date)
    #results.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True)
    results['Symbol'] =[i]* int(results.shape[0])
    results.reset_index(inplace = True)
    df = df.append(results, ignore_index=True)
    print(i, 'dataframe added to base.')
print('Complete')

ACB dataframe added to base.
F dataframe added to base.
GE dataframe added to base.
DIS dataframe added to base.
AAL dataframe added to base.
GPRO dataframe added to base.
DAL dataframe added to base.
MSFT dataframe added to base.
CCL dataframe added to base.
AAPL dataframe added to base.
FIT dataframe added to base.
SNAP dataframe added to base.
PLUG dataframe added to base.
BAC dataframe added to base.
BA dataframe added to base.
NCLH dataframe added to base.
INO dataframe added to base.
UAL dataframe added to base.
UBER dataframe added to base.
CGC dataframe added to base.
TSLA dataframe added to base.
AMD dataframe added to base.
CRON dataframe added to base.
RCL dataframe added to base.
TWTR dataframe added to base.
GRPN dataframe added to base.
FB dataframe added to base.
SBUX dataframe added to base.
MRO dataframe added to base.
ZNGA dataframe added to base.
BABA dataframe added to base.
T dataframe added to base.
KO dataframe added to base.
APHA dataframe added to base.
USO dat

In [53]:
def daily_mean(x):
    values = [x['Open'], x['High'], x['Low'], x['Close']]
    return mean(values)
def daily_stddev(x):
    values = [x['Open'], x['High'], x['Low'], x['Close']]
    return stdev(values)
def daily_pct_change(x):
    return (x['Close']-x['Open'])/x['Open']
def notable_change(x):
    return 1 if abs(x['Pct_Change']) > sym_std[x['Symbol']] else 0
def prior_trend(x, num_of_days):
    _date =x['Date']
    _start = _date - timedelta(days=num_of_days)
    temp = df[df['Symbol'] == x['Symbol']]
    temp = temp[temp['Date'] <= _date]
    temp = temp[temp['Date']> _start]
    return temp['Pct_Change'].mean()

df['Mean'] = df.apply(lambda x: daily_mean(x), axis=1)
df['Std_Dev'] = df.apply(lambda x: daily_stddev(x), axis=1)
df['Pct_Change'] = df.apply(lambda x: daily_pct_change(x), axis=1)
df['Notable_Change'] = df.apply(lambda x: notable_change(x), axis=1)
df['Three_Day_Movement'] = df.apply(lambda x: prior_trend(x, 3), axis=1)
df['Five_Day_Movement'] = df.apply(lambda x: prior_trend(x, 5), axis=1)


In [51]:
df['Shifted_Notable_Change'] = df['Notable_Change'].shift(1)

for i in symbols:
    #remove all the earliest dates to account for the shift
    temp = df[df['Symbol'] ==i]
    index = temp[temp['Date']==temp['Date'].iloc[0]].index
    df.drop(index, inplace=True)

In [52]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Symbol,Mean,Std_Dev,Pct_Change,Notable_Change,Three_Day_Movement,Shifted_Notable_Change
1,2020-04-02,9.96,11.02,9.61,10.0,2770000.0,0.0,0.0,ACB,10.1475,0.607474,0.004016,0,-0.035611,0.0
2,2020-04-03,10.4,10.44,9.48,9.72,2549000.0,0.0,0.0,ACB,10.01,0.483735,-0.065385,0,-0.045536,0.0
3,2020-04-06,10.14,10.37,9.36,9.6,4244600.0,0.0,0.0,ACB,9.8675,0.467574,-0.053254,0,-0.053254,0.0
4,2020-04-07,10.08,10.21,9.53,9.6,2423000.0,0.0,0.0,ACB,9.855,0.340245,-0.047619,0,-0.050437,0.0
5,2020-04-08,9.88,10.39,9.6,10.22,2888000.0,0.0,0.0,ACB,10.0225,0.35255,0.034413,0,-0.022154,0.0


In [29]:
sym_std = {}
for i in symbols:
    temp = df[df['Symbol'] == i]
    sym_std[i] = temp['Pct_Change'].std()*3
    print(i, temp['Pct_Change'].std()*3)

ACB 0.2001487253019728
F 0.07673396135219634
GE 0.08888087694793932
DIS 0.05578413042303747
AAL 0.1560102451184592
GPRO 0.10224459299411476
DAL 0.11109049429522211
MSFT 0.05027997510338645
CCL 0.1596266239268621
AAPL 0.056680051102366624
FIT 0.03886330297683439
SNAP 0.08646082340538325
PLUG 0.13648790974745653
BAC 0.07036122018349499
BA 0.10764237546790258
NCLH 0.1695600721620938
INO 0.2014610532857304
UAL 0.15514001990502507
UBER 0.08373157072150395
CGC 0.11198124274982785
TSLA 0.12806511674058513
AMD 0.09705705206560473
CRON 0.12091489379178566
RCL 0.15464656837006338
TWTR 0.0735565178707798
GRPN 0.1818514334439524
FB 0.06183507637728998
SBUX 0.04754592291205778
MRO 0.11905428400064952
ZNGA 0.06787178746588023
BABA 0.04752128128193103
T 0.041622797717533265
KO 0.04622793538855605
APHA 0.11217993709921345
USO 0.09000063845468184
XOM 0.07337804951862928
AMZN 0.05952761273846989
MFA 0.1701729158098979
JBLU 0.11701752657047895
NIO 0.17464377078367962
MRNA 0.1440653417791106
LUV 0.0955418

# Train Test Setup

In [64]:
columns = ['Volume', 'Dividends', 'Stock Splits', 'Pct_Change', 'Three_Day_Movement', 'Five_Day_Movement']
target_col = ['Shifted_Notable_Change']
X = df[columns]
y = df[target_col]

In [65]:
train_X, valid_X, train_y, valid_y = train_test_split(X,y, test_size=.3, train_size=.7, shuffle=True, random_state = 0)

In [66]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

xgb_params = {'max_depth':[i for i in range(3,11, 2)], 
              'n_estimators':[100, 300, 500, 1000, 5000, 10000], 
              'learning_rate':[.1], 
              'n_jobs':[4],
              }
xgb = XGBClassifier()
xgb_rand_search  = GridSearchCV(xgb, xgb_params, scoring='f1', cv=5, refit=True)
xgb_rand_search.fit(train_X, train_y)
print('Best Score:', xgb_rand_search.best_score_)
print('Best Parameters:', xgb_rand_search.best_params_)
print('Train Score:', xgb_rand_search.score(train_X, train_y))
print('Valid Score:', xgb_rand_search.score(valid_X, valid_y))

Best Score: 0.19999999999999998
Best Parameters: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 100, 'n_jobs': 4}
Train Score: 0.7627118644067796
Valid Score: 0.18604651162790697


In [67]:
xgb_params = {'max_depth':7, 
              'n_estimators':100, 
              'learning_rate':.4, 
              'n_jobs':4,
              }
my_model = XGBClassifier(**xgb_params)
my_model.fit(X, y, verbose=False)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.4, max_delta_step=0, max_depth=7,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

# Simulate model with the past

The goal of this section is to run the model through a few months of use with imaginary money and see how it performs

In [None]:
class StockSimulator:
    def __init__(self, start_date, symbols, model):
        self._date = start_date
        self.end_date = date(2020, 9, 1)
        self.symbols = symbols
        self.money = MyMoney(1000)
        self._LOG = pd.DataFrame()
        self.model = model
    
    def collect_inputs(self):
        inputs = []
        for sym in self.symbols:
            temp = df[df['Symbol']==sym]
            temp = temp[temp['Date']==self._date]
            temp = temp[['Volume', 'Dividends', 'Stock Splits', 'Pct_Change', 'Three_Day_Movement', 'Five_Day_Movement']]
            inputs.append([sym,temp])
        return inputs

    def make_predictions(self, model_inputs):
        prediction = self.model.predict(model_inputs)
        print(prediction)
        if prediction == 1 and model_inputs['Pct_Change']>0:
            return True
        else:
            return False


    def transaction(self, symbol)
        amount = self.money.get_amount()
        if amount:
            self.money.add_to_portfolio(symbol, amount)

    def auto_sell(self)
        #TODO loop through everything in portfolio and sell anything two days after buying it

    def RUN_SIM(self): #TODO this could use some serious cleanup for naming
        while self._date < self.end_date:
            days_inputs = self.collect_inputs()
            for i in days_inputs:
                daily_predictions.append([days_inputs[0], self.make_predictions(days_inputs[1])])
            for i in daily_predictions:
                if i[1]:
                    self.transaction(i[0])
            self.auto_sell()



In [None]:
class MyMoney:
    """
    This class serves to manage the portofolio/money aspect of the simulation.
    """
    def __init__(self, starting_funds):
        self.funds = starting_funds
        self.portfolio = {}
        self.chunk_pct = .2
    
    def add_to_portfolio(self, symbol, amount):
        if symbol not in self.portfolio:
            self.portfolio[symbol] = amount
            self.funds = self.funds - amount
        else:
            self.portfolio[symbol] = self.portfolio[symbol]+=amount
            self.funds = self.funds - amount
    
    def sell_from_portfolio(self, symbol)
        self.funds += self.portfolio[symbol]
        self.portfolio.pop(symbol)

    def total_portfolio(self):
        return total_amount
    
    def get_amount(self):
        want_to_spend = .2 * self.total_portfolio()
        if want_to_spend> self.funds:
            return want_to_spend
        elif self.funds > want_to_spend and self.funds != 0:
            return self.funds
        else:
            return False