In [105]:
import pandas as pd
from tools import create_vectors

In [106]:
vital_columns = ['date_', 'ticker', 'close', 'diffs', 'bin_2', 'bin_3', 'bin_5']
cols = ['open', 'high', 'low', 'vol', 'max_comp',
       'max_neg', 'max_neu', 'max_pos', 'min_comp', 'min_neu',
       'min_pos', 'std_comp', 'std_neg', 'std_neu', 'std_pos', 'mean_comp',
       'mean_neg', 'mean_neu', 'mean_pos', 'median_comp', 'median_neg',
       'median_neu', 'median_pos', 'count']

In [107]:
def is_stationary(ts):
    from statsmodels.tsa.stattools import adfuller
    p_val = adfuller(ts)[1]
    return p_val <= 0.05

def is_relevant(granger_result):
    tests = 'ssr_ftest', 'ssr_chi2test', 'lrtest', 'params_ftest'
    p_val = []
    for i in tests:
        p_val.append(granger_result[3][0][i][1])
    p_val = round(sum(p_val)/len(p_val), 2)
    return p_val <= 0.05

def granger_test(df, main_col, cols):
    from statsmodels.tsa.stattools import grangercausalitytests
    relevant = []
    for col in cols:
        try:
            x = grangercausalitytests(df[[main_col, col]], maxlag=[3], verbose=False)
            if is_relevant(x):
                relevant.append(col)
        except:
            print("There was an issue with: ", col)
        
    return relevant

In [108]:
amzn = pd.read_csv('../datasets/binned/amzn_binned.csv')

In [109]:
# check stationarity
close_stat = is_stationary(amzn.close)
close_pct_stat = is_stationary(amzn.close.pct_change().dropna())
print(close_stat, close_pct_stat)

False True


In [116]:
amzn['close_pct'] = amzn.close.pct_change()
amzn = amzn.dropna()
# granger test
relevant = granger_test(amzn, 'close_pct', cols)

In [112]:
amzn[vital_columns + relevant].to_csv('../datasets/relevant/amzn.csv', index=False)

In [113]:
aapl = pd.read_csv('../datasets/binned/aapl_binned.csv')
close_stat = is_stationary(aapl.close)
close_pct_stat = is_stationary(aapl.close.pct_change().dropna())
print(close_stat, close_pct_stat)

False True


In [117]:
aapl['close_pct'] = aapl.close.pct_change()
aapl = aapl.dropna()
relevant = granger_test(amzn, 'close_pct', cols)

In [118]:
print(relevant)

['low', 'vol', 'max_pos', 'std_neg', 'std_pos', 'mean_comp', 'mean_neg', 'mean_pos', 'median_comp', 'count']
