In [2]:
import numpy as np
import pandas as pd
import ta
import scipy.stats
from datetime import datetime
import math

In [3]:
df = pd.read_csv("./data/SNP.csv")
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
4888,2020-03-26,47.09,47.799999,46.75,47.400002,47.400002,281800
4889,2020-03-27,45.889999,46.689999,45.450001,45.509998,45.509998,406600
4890,2020-03-30,46.389999,48.02,46.389999,47.66,47.66,390100
4891,2020-03-31,48.5,49.32,48.200001,48.700001,48.700001,223500
4892,2020-04-01,46.02,48.57,45.849998,47.529999,47.529999,316500


In [23]:
df = ta.add_all_ta_features(
    df, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)

# Slow : 60+
# Fast : 15
df['date'] = df['Date'].apply(lambda x: datetime.strptime(x,"%Y-%m-%d"))
df['dateInt'] = df['date'].apply(lambda x: (x - datetime(1970,1,1)).days)

df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi', 'volume_em',
       'volume_sma_em', 'volume_vpt', 'volume_vwap', 'volume_mfi',
       'volume_nvi', 'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
       'volatility_bbw', 'volatility_bbp', 'volatility_bbhi',
       'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl',
       'volatility_kcw', 'volatility_kcp', 'volatility_kchi',
       'volatility_kcli', 'volatility_dcl', 'volatility_dch', 'volatility_dcm',
       'volatility_dcw', 'volatility_dcp', 'volatility_atr', 'volatility_ui',
       'trend_macd', 'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
       'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_vortex_ind_pos', 'trend_vortex_ind_neg', 'trend_vortex_ind_diff',
       'trend_trix', 'trend_mass_index', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',

In [38]:
# Feature Vector Calculations.

df['classPricechange'] = 1*((df['Close']-df['Open'])>=0)

df['medianPrice'] = ((df['High']+df['Low'])/2).shift(1)
df['typialPrice'] = ((df['High']+df['Low']+df['Close'])/3).shift(1)
df['weightedTypicalPrice'] = ((df['High']+df['Low']+2*df['Close'])/3).shift(1)

df['metricSMASlow'] = df['trend_sma_slow'].shift(1)
df['metricSMAFast'] = df['trend_sma_fast'].shift(1)

df['metricEMASlow'] = df['trend_ema_slow'].shift(1)
df['metricEMAFast'] = df['trend_ema_fast'].shift(1)
df['metricMACD'] = df['trend_macd'].shift(1)

df['momentumRSI'] = df['momentum_rsi'].shift(1)
df['momentumUO'] = df['momentum_uo'].shift(1)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,volume_adi,volume_obv,volume_cmf,...,momentum_kama,others_dr,others_dlr,others_cr,date,dateInt,classPricechange,medianPrice,typialPrice,weightedTypicalPrice
0,2000-10-18,15.865385,16.057692,15.865385,15.961538,5.656859,15145200,0.0,15145200,0.0,...,15.961538,0.0,0.0,0.0,2000-10-18,11248,1,,,
1,2000-10-19,15.384615,15.576923,14.807693,15.528846,5.50351,1434600,1255273.0,13710600,0.075711,...,15.813173,-2.710845,-2.748266,-2.710845,2000-10-19,11249,1,15.961538,15.961538,21.282051
2,2000-10-20,15.480769,15.673077,15.384615,15.384615,5.452393,263100,992172.6,13447500,0.058907,...,15.659287,-0.928793,-0.933133,-3.61446,2000-10-20,11250,0,15.192308,15.304487,20.480769
3,2000-10-23,15.096154,15.096154,14.278846,14.903846,5.282008,615900,1318236.0,12831600,0.075506,...,15.367363,-3.125,-3.174869,-6.626507,2000-10-23,11253,0,15.528846,15.480769,20.608974
4,2000-10-24,14.615385,14.759615,14.326923,14.423077,5.111618,674300,943624.0,12157300,0.052039,...,14.956563,-3.225806,-3.278982,-9.638555,2000-10-24,11254,0,14.6875,14.759615,19.727564


In [41]:
#MACD_14

features = ['dateInt',
               'medianPrice','typialPrice','weightedTypicalPrice',
               'metricSMASlow','metricSMAFast','metricEMASlow','metricEMAFast','metricMACD',
               'momentumRSI','momentumUO']

df2 = df[1:].reset_index(drop=True)

all_x = df2[features].values
all_y = df2['classPricechange'].values
#df_input.head()

train_test_mask = (df2['date'] <= datetime(2019,1,1))&(df2['date'] >= datetime(2001,1,1))

train_x = all_x[train_test_mask]
train_y = all_y[train_test_mask]

test_x = all_x[~train_test_mask]
test_y = all_y[~train_test_mask]

In [54]:
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

myPipeline = Pipeline([('scaler', StandardScaler()), 
                       ('gnb', GaussianNB())])
myPipeline.fit(train_x,train_y)


train_yhat = myPipeline.predict(train_x)

test_yhat = myPipeline.predict(test_x)


print("Test Confusion Matrix")
print(confusion_matrix(train_y, train_yhat))

print("Test Confusion Matrix")
print(confusion_matrix(test_y, test_yhat))


Test Confusion Matrix
[[1048 1136]
 [1054 1289]]
Test Confusion Matrix
[[ 77 117]
 [ 66 105]]


In [91]:
import numpy as np
import scipy.stats


array([[ 1.78980000e+04,  7.10249977e+01,  7.08833313e+01, ...,
         8.22687240e+01,  8.08435611e+01, -1.09599991e+01],
       [ 1.78990000e+04,  7.05499992e+01,  7.08933334e+01, ...,
         8.18005608e+01,  8.04124859e+01, -9.90999603e+00],
       [ 1.79000000e+04,  7.17599983e+01,  7.16366653e+01, ...,
         8.12990813e+01,  8.01552039e+01, -1.10999985e+01],
       ...,
       [ 1.83510000e+04,  4.60699997e+01,  4.58833326e+01, ...,
         4.88400512e+01,  4.86827110e+01, -1.32000351e+00],
       [ 1.83520000e+04,  4.72049999e+01,  4.73566666e+01, ...,
         4.84192349e+01,  4.84566703e+01, -2.29999924e+00],
       [ 1.83530000e+04,  4.87600002e+01,  4.87400004e+01, ...,
         4.80103574e+01,  4.79463095e+01,  3.69998932e-01]])

In [54]:
import sklearn
from sklearn.linear_model import PoissonRegressor

In [55]:
myPoisson = PoissonRegressor(alpha = 1.0)

In [67]:
pre_20_mask = df['Date_stamp'] < datetime.strptime("2020-01-01",'%Y-%m-%d')

yArray = np.array(df['Volume'])
xArray = np.array(df[['Open','Adj Close']])
xTr = xArray[pre_20_mask,]
xTe = xArray[~pre_20_mask,]
yTr = yArray[pre_20_mask]
yTe = yArray[~pre_20_mask]

myPoisson.fit(xTr,yTr)
myPoisson.coef_

yTe_P = myPoisson.predict(xTe)


In [None]:
#P(Y|w)P(w|x)

In [None]:
#max likelihood:
# P(y_1...N|X) = PI(Y_1|x_1)