In [1]:
import os
import numpy as np
import pandas as pd
import pickle # for serializing and de-serializing a Python object structure.
from datetime import datetime

import quandl

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
py.init_notebook_mode(connected=True)

In [2]:
from TCC_pkg import dataanalysis as da
from TCC_pkg import technicalanalysis as ta
from TCC_pkg import testspecification as tspec


The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.



## Selection of time series

In [3]:
#======== USER-DEFINED ========#
path = '../Data/aapl.pkl'
with open(path, 'rb') as f:
    aapl = pickle.load(f)
df_raw = aapl
#==============================#

In [4]:
#======== USER-DEFINED ========#
path = '../Data/amzn.pkl'
with open(path, 'rb') as f:
    amzn = pickle.load(f)
df_raw = amzn
#==============================#

# Execution

In [5]:
# Determines correct prediction
df_real = df_raw.copy()
df_real['Real'] = (2*(df_real['Close'].diff().dropna() > 0)-1).astype(int)

In [6]:
da.plotscatter(df_raw[['Close']])
da.candleplot(df_raw)

In [7]:
test_spec = tspec.TestSpec(indices=df_raw.index, 
                     start_dates=['2016-01', '2016-07', '2017-01', '2017-07', '2018-01'], 
                     window_size=400
                    )


In [8]:
# Validacao da classe de testes
print(test_spec,'\n\n')
print(test_spec.instance[0],'\n\n')

print(test_spec.start_dates,'\n\n')

for i in range(len(test_spec.instance)):
    print(test_spec.instance[i].test_set,'\n\n')
    
for i in range(len(test_spec.instance[0].expanding_window_fv.val_sets)):
    print(test_spec.instance[0].expanding_window_fv.train_sets[i], '\n', test_spec.instance[0].expanding_window_fv.val_sets[i],'\n\n')
    print('val/train = {}\n\n'.format(len(test_spec.instance[0].expanding_window_fv.val_sets[i])/len(test_spec.instance[0].expanding_window_fv.train_sets[i])))
        
for i in range(len(test_spec.instance[0].expanding_window_cv.val_sets)):
    print(test_spec.instance[0].expanding_window_cv.train_sets[i], '\n', test_spec.instance[0].expanding_window_cv.val_sets[i],'\n\n')
    print('val/train = {}\n\n'.format(len(test_spec.instance[0].expanding_window_cv.val_sets[i])/len(test_spec.instance[0].expanding_window_cv.train_sets[i])))
    

<TCC_pkg.testspecification.TestSpec object at 0x7f0e95a5fac8> 


<TCC_pkg.testspecification.TestSpec.TestInstance object at 0x7f0e95a5ff28> 


['2016-01', '2016-07', '2017-01', '2017-07', '2018-01'] 


DatetimeIndex(['2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07',
               '2016-01-08', '2016-01-11', '2016-01-12', '2016-01-13',
               '2016-01-14', '2016-01-15',
               ...
               '2016-06-17', '2016-06-20', '2016-06-21', '2016-06-22',
               '2016-06-23', '2016-06-24', '2016-06-27', '2016-06-28',
               '2016-06-29', '2016-06-30'],
              dtype='datetime64[ns]', name='Date', length=125, freq=None) 


DatetimeIndex(['2016-07-01', '2016-07-05', '2016-07-06', '2016-07-07',
               '2016-07-08', '2016-07-11', '2016-07-12', '2016-07-13',
               '2016-07-14', '2016-07-15',
               ...
               '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
               '2016-12-22', '2016-12-23', '2016-12-27',

In [9]:
import operator

def execute_test_routine(df, test_spec, ta_params, ta_pred_func):
    decay_array = (0.95, 0.98, 0.99, 0.995, 0.999)
    start_dates = test_spec.start_dates

    # Remove NaN entries
    df = df.dropna()    
    
    # Predict for every combination of parameters
    for params in ta_params:
        df['{}'.format(params)] = ta_pred_func(df, *params)

    # Remove NaN entries
    df = df.dropna()    

    # Initialize variables
    pred_arr = np.empty(0)
    real_arr = np.empty(0)
    for instance in test_spec.instance:
        decay_acc = {}
        for decay in decay_array:
            decay_acc[decay] = 0

            # For each forward validation index, get validation-set performance
            for ifv in range(len(instance.expanding_window_fv.train_sets)): # expanding window only
                train_ind = instance.expanding_window_fv.train_sets[ifv]

                # For each combination of parameters, evaluate their weighted-acc
                params_acc = {}
                for params in ta_params:
                    real = df.loc[:train_ind[-1], 'Real'].values
                    pred = df.loc[:train_ind[-1],'{}'.format(params)].values
                    params_acc[params] = da.acc_weighted(real, pred, decay)

                # Apply best params to the validation set
                val_ind = instance.expanding_window_fv.val_sets[ifv]
                best_params = max(params_acc.items(), key=operator.itemgetter(1))[0] 
                real = df.loc[val_ind, 'Real'].values
                pred = df.loc[val_ind,'{}'.format(best_params)].values
                decay_acc[decay] += da.acc_weighted(real, pred, decay)

        # Decay that yielded maximum acc
        best_decay = max(decay_acc.items(), key=operator.itemgetter(1))[0] 

        # Use best_decay to select among parameters
        params_acc = {}
        train_set = instance.train_set
        for params in ta_params:
            real = df.loc[:train_set[-1], 'Real'].values
            pred = df.loc[:train_set[-1],'{}'.format(params)].values
            params_acc[params] = da.acc_weighted(real, pred, best_decay)
        best_params = max(params_acc.items(), key=operator.itemgetter(1))[0]

        # Predict using parameters
        pred = df.loc[instance.test_set, '{}'.format(best_params)].values.astype(int)
        real = df.loc[instance.test_set,'Real'].values.astype(int)
        pred_arr = np.r_[pred_arr, pred]
        real_arr = np.r_[real_arr, real]

    # Evaluate the entire test set together
    quality_metrics_dict = da.classification_metrics(y_true=real_arr, y_pred=pred_arr)
    
    return quality_metrics_dict

In [10]:
def execute_nonparam_test_routine(df, test_spec, ta_pred_func):
    start_dates = test_spec.start_dates

    # Remove NaN entries
    df = df.dropna()    
    
    # Predict for only combination of parameters
    df['Pred'] = ta_pred_func(df)
    
    # Remove NaN entries
    df = df.dropna()    

    # Initialize variables
    pred_arr = np.empty(0)
    real_arr = np.empty(0)
    for instance in test_spec.instance:
        pred = df.loc[instance.test_set,'Pred'].values.astype(int)
        real = df.loc[instance.test_set,'Real'].values.astype(int)
        pred_arr = np.r_[pred_arr, pred]
        real_arr = np.r_[real_arr, real]
        
    # Evaluate the entire test set together
    quality_metrics_dict = da.classification_metrics(y_true=real_arr, y_pred=pred_arr)
    
    return quality_metrics_dict

In [11]:
ta_res = {}

In [12]:
SMA_params = [(50,200), (30,70), (10,30)]     # (small_window, large_window)
df = df_real.copy()
ta_res['SMA'] = execute_test_routine(df, test_spec, ta_params=SMA_params, ta_pred_func=ta.SMA_prediction)

In [13]:
EMA_params = [(10,50),(5,35),(3,20)] # (small_window, large_window)
df = df_real.copy()
ta_res['EMA'] = execute_test_routine(df, test_spec, ta_params=EMA_params, ta_pred_func=ta.EMA_prediction)

In [14]:
STOCH_params = [(20,80), (30,70), (40,60)]    # (buy, sell)
df = df_real.copy()
df = pd.concat([df, ta.STOCH(df)], axis=1)
ta_res['STOCH'] = execute_test_routine(df, test_spec, ta_params=STOCH_params, ta_pred_func=ta.STOCH_prediction)

In [15]:
RSI_params = [(20,80), (30,70), (40,60)]      # (buy, sell) 
df = df_real.copy()
df = pd.concat([df, ta.RSI(df)], axis=1)
ta_res['RSI'] = execute_test_routine(df, test_spec, ta_params=RSI_params, ta_pred_func=ta.RSI_prediction)

In [16]:
AROON_params = [(90,-90), (70,-70), (30,-30)] # (buy, sell) 
df = df_real.copy()
df = pd.concat([df, ta.AROON(df)], axis=1)
ta_res['AROON'] = execute_test_routine(df, test_spec, ta_params=AROON_params, ta_pred_func=ta.AROON_prediction)

In [17]:
BB_params = []
df = df_real.copy()
df = pd.concat([df, ta.BB(df)], axis=1)
ta_res['BB'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.BB_prediction)

In [18]:
MACD_params = []
df = df_real.copy()
df = pd.concat([df, ta.MACD(df)], axis=1)
ta_res['MACD'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.MACD_prediction)

In [19]:
CHAIKIN_params = []
df = df_real.copy()
df = pd.concat([df, ta.CHAIKIN(df)], axis=1)
ta_res['CHAIKIN'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.CHAIKIN_prediction)

In [20]:
# amzn
technical_analysis_results = pd.DataFrame(data=ta_res)
technical_analysis_results

Unnamed: 0,AROON,BB,CHAIKIN,EMA,MACD,RSI,SMA,STOCH
accuracy,0.557143,0.444643,0.605357,0.594643,0.532143,0.460714,0.557143,0.444643
cohen_kappa,0.037521,-0.062573,0.174889,0.132808,0.061184,0.000225,0.024514,-0.03364
f1,0.67624,0.379242,0.679245,0.691156,0.566225,0.297674,0.686869,0.291572
fbeta,0.67624,0.379242,0.679245,0.691156,0.566225,0.297674,0.686869,0.291572
fn,58.0,222.0,83.0,63.0,146.0,253.0,45.0,253.0
fp,190.0,89.0,138.0,164.0,116.0,49.0,203.0,58.0
mathews_corrcoef,0.043689,-0.070245,0.178697,0.143955,0.061541,0.000305,0.03129,-0.044172
precision,0.576837,0.516304,0.629032,0.607656,0.595819,0.566372,0.572632,0.52459
recall,0.817035,0.299685,0.73817,0.801262,0.539432,0.201893,0.858044,0.201893
tn,53.0,154.0,105.0,79.0,127.0,194.0,40.0,185.0
