In [1]:
import os
import numpy as np
import pandas as pd
import pickle # for serializing and de-serializing a Python object structure.
from datetime import datetime

import quandl

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
py.init_notebook_mode(connected=True)

In [2]:
from TCC_pkg import dataanalysis as da
from TCC_pkg import technicalanalysis as ta
from TCC_pkg import testspecification as tspec


The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.



## Selection of time series

In [3]:
# aapl, amzn, fb, ge, goog, msft, tsla, twtr
#======== USER-DEFINED ========#
stock = 'twtr'
#==============================#
path = '../Data/{}.pkl'.format(stock)
with open(path, 'rb') as f:
    df_raw = pickle.load(f)

# Execution

In [4]:
da.plotscatter(df_raw[['Close']])
da.candleplot(df_raw)

In [5]:
test_spec = tspec.TestSpec(indices=df_raw.index, 
                     start_dates=['2016-01', '2016-07', '2017-01', '2017-07', '2018-01'], 
                     window_size=400
                    )


In [6]:
# Validacao da classe de testes
print(test_spec,'\n\n')
print(test_spec.instance[0],'\n\n')

print(test_spec.start_dates,'\n\n')

for i in range(len(test_spec.instance)):
    print(test_spec.instance[i].test_set,'\n\n')
    
for i in range(len(test_spec.instance[0].expanding_window_fv.val_sets)):
    print(test_spec.instance[0].expanding_window_fv.train_sets[i], '\n', test_spec.instance[0].expanding_window_fv.val_sets[i],'\n\n')
    print('val/train = {}\n\n'.format(len(test_spec.instance[0].expanding_window_fv.val_sets[i])/len(test_spec.instance[0].expanding_window_fv.train_sets[i])))
        
for i in range(len(test_spec.instance[0].expanding_window_cv.val_sets)):
    print(test_spec.instance[0].expanding_window_cv.train_sets[i], '\n', test_spec.instance[0].expanding_window_cv.val_sets[i],'\n\n')
    print('val/train = {}\n\n'.format(len(test_spec.instance[0].expanding_window_cv.val_sets[i])/len(test_spec.instance[0].expanding_window_cv.train_sets[i])))
    

<TCC_pkg.testspecification.TestSpec object at 0x7fcf30db5128> 


<TCC_pkg.testspecification.TestSpec.TestInstance object at 0x7fcf30db5278> 


['2016-01', '2016-07', '2017-01', '2017-07', '2018-01'] 


DatetimeIndex(['2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07',
               '2016-01-08', '2016-01-11', '2016-01-12', '2016-01-13',
               '2016-01-14', '2016-01-15',
               ...
               '2016-06-17', '2016-06-20', '2016-06-21', '2016-06-22',
               '2016-06-23', '2016-06-24', '2016-06-27', '2016-06-28',
               '2016-06-29', '2016-06-30'],
              dtype='datetime64[ns]', name='Date', length=125, freq=None) 


DatetimeIndex(['2016-07-01', '2016-07-05', '2016-07-06', '2016-07-07',
               '2016-07-08', '2016-07-11', '2016-07-12', '2016-07-13',
               '2016-07-14', '2016-07-15',
               ...
               '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
               '2016-12-22', '2016-12-23', '2016-12-27',

In [7]:
import operator

def execute_test_routine(df, test_spec, ta_params, ta_pred_func):
    decay_array = (0.95, 0.98, 0.99, 0.995, 0.999)
    start_dates = test_spec.start_dates

    # Remove NaN entries
    df = df.dropna()    
    
    # Predict for every combination of parameters
    for params in ta_params:
        df['{}'.format(params)] = ta_pred_func(df, *params)

    # Remove NaN entries
    df = df.dropna()    

    # Initialize variables
    pred_arr = np.empty(0)
    real_arr = np.empty(0)
    for instance in test_spec.instance:
        decay_acc = {}
        for decay in decay_array:
            decay_acc[decay] = 0

            # For each forward validation index, get validation-set performance
            for ifv in range(len(instance.expanding_window_fv.train_sets)): # expanding window only
                train_ind = instance.expanding_window_fv.train_sets[ifv]

                # For each combination of parameters, evaluate their weighted-acc
                params_acc = {}
                for params in ta_params:
                    real = df.loc[:train_ind[-1], 'Direction'].values
                    pred = df.loc[:train_ind[-1],'{}'.format(params)].values
                    params_acc[params] = da.acc_weighted(real, pred, decay)

                # Apply best params to the validation set
                val_ind = instance.expanding_window_fv.val_sets[ifv]
                best_params = max(params_acc.items(), key=operator.itemgetter(1))[0] 
                real = df.loc[val_ind, 'Direction'].values
                pred = df.loc[val_ind,'{}'.format(best_params)].values
                decay_acc[decay] += da.acc_weighted(real, pred, decay)

        # Decay that yielded maximum acc
        best_decay = max(decay_acc.items(), key=operator.itemgetter(1))[0] 

        # Use best_decay to select among parameters
        params_acc = {}
        train_set = instance.train_set
        for params in ta_params:
            real = df.loc[:train_set[-1], 'Direction'].values
            pred = df.loc[:train_set[-1],'{}'.format(params)].values
            params_acc[params] = da.acc_weighted(real, pred, best_decay)
        best_params = max(params_acc.items(), key=operator.itemgetter(1))[0]

        # Predict using parameters
        pred = df.loc[instance.test_set, '{}'.format(best_params)].values.astype(int)
        real = df.loc[instance.test_set,'Direction'].values.astype(int)
        pred_arr = np.r_[pred_arr, pred]
        real_arr = np.r_[real_arr, real]

    # Evaluate the entire test set together
    quality_metrics_dict = da.classification_metrics(y_true=real_arr, y_pred=pred_arr)
    
    return quality_metrics_dict

In [8]:
def execute_nonparam_test_routine(df, test_spec, ta_pred_func):
    start_dates = test_spec.start_dates

    # Remove NaN entries
    df = df.dropna()    
    
    # Predict for only combination of parameters
    df['Pred'] = ta_pred_func(df)
    
    # Remove NaN entries
    df = df.dropna()    

    # Initialize variables
    pred_arr = np.empty(0)
    real_arr = np.empty(0)
    for instance in test_spec.instance:
        pred = df.loc[instance.test_set,'Pred'].values.astype(int)
        real = df.loc[instance.test_set,'Direction'].values.astype(int)
        pred_arr = np.r_[pred_arr, pred]
        real_arr = np.r_[real_arr, real]
        
    # Evaluate the entire test set together
    quality_metrics_dict = da.classification_metrics(y_true=real_arr, y_pred=pred_arr)
    
    return quality_metrics_dict

In [9]:
ta_res = {}

In [10]:
SMA_params = [(50,200), (30,70), (10,30)]     # (small_window, large_window)
df = df_raw.copy()
ta_res['SMA'] = execute_test_routine(df, test_spec, ta_params=SMA_params, ta_pred_func=ta.SMA_prediction)

In [11]:
EMA_params = [(10,50),(5,35),(3,20)] # (small_window, large_window)
df = df_raw.copy()
ta_res['EMA'] = execute_test_routine(df, test_spec, ta_params=EMA_params, ta_pred_func=ta.EMA_prediction)

In [12]:
STOCH_params = [(20,80), (30,70), (40,60)]    # (buy, sell)
df = df_raw.copy()
df = pd.concat([df, ta.STOCH(df)], axis=1)
ta_res['STOCH'] = execute_test_routine(df, test_spec, ta_params=STOCH_params, ta_pred_func=ta.STOCH_prediction)

In [13]:
RSI_params = [(20,80), (30,70), (40,60)]      # (buy, sell) 
df = df_raw.copy()
df = pd.concat([df, ta.RSI(df)], axis=1)
ta_res['RSI'] = execute_test_routine(df, test_spec, ta_params=RSI_params, ta_pred_func=ta.RSI_prediction)

In [14]:
AROON_params = [(90,-90), (70,-70), (30,-30)] # (buy, sell) 
df = df_raw.copy()
df = pd.concat([df, ta.AROON(df)], axis=1)
ta_res['AROON'] = execute_test_routine(df, test_spec, ta_params=AROON_params, ta_pred_func=ta.AROON_prediction)

In [15]:
BB_params = []
df = df_raw.copy()
df = pd.concat([df, ta.BB(df)], axis=1)
ta_res['BB'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.BB_prediction)

In [16]:
MACD_params = []
df = df_raw.copy()
df = pd.concat([df, ta.MACD(df)], axis=1)
ta_res['MACD'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.MACD_prediction)

In [17]:
CHAIKIN_params = []
df = df_raw.copy()
df = pd.concat([df, ta.CHAIKIN(df)], axis=1)
ta_res['CHAIKIN'] = execute_nonparam_test_routine(df, test_spec, ta_pred_func=ta.CHAIKIN_prediction)

In [18]:
technical_analysis_results = pd.DataFrame(data=ta_res)
technical_analysis_results

Unnamed: 0,AROON,BB,CHAIKIN,EMA,MACD,RSI,SMA,STOCH
accuracy,0.509804,0.475936,0.552585,0.554367,0.552585,0.504456,0.481283,0.497326
cohen_kappa,0.0192,-0.047905,0.105445,0.108607,0.10499,0.00838,-0.037311,-0.005427
f1,0.561404,0.443182,0.510721,0.571918,0.576728,0.569659,0.464088,0.508711
fbeta,0.561404,0.443182,0.510721,0.571918,0.576728,0.569659,0.464088,0.508711
fn,105.0,164.0,150.0,114.0,110.0,97.0,155.0,135.0
fp,170.0,130.0,101.0,136.0,141.0,181.0,136.0,147.0
mathews_corrcoef,0.019737,-0.048261,0.107091,0.108943,0.105638,0.008784,-0.037397,-0.005432
precision,0.508671,0.473684,0.564655,0.551155,0.548077,0.50411,0.480916,0.498294
recall,0.626335,0.41637,0.466192,0.594306,0.608541,0.654804,0.448399,0.519573
tn,110.0,150.0,179.0,144.0,139.0,99.0,144.0,133.0


In [19]:
ta_obj = ta.TechAnalysis(df_raw, test_spec)
ta_obj.execute_test_routine()
ta_obj.results

Unnamed: 0,AROON,BB,CHAIKIN,EMA,MACD,RSI,SMA,STOCH
accuracy,0.509804,0.475936,0.552585,0.554367,0.552585,0.504456,0.481283,0.497326
cohen_kappa,0.0192,-0.047905,0.105445,0.108607,0.10499,0.00838,-0.037311,-0.005427
f1,0.561404,0.443182,0.510721,0.571918,0.576728,0.569659,0.464088,0.508711
fbeta,0.561404,0.443182,0.510721,0.571918,0.576728,0.569659,0.464088,0.508711
fn,105.0,164.0,150.0,114.0,110.0,97.0,155.0,135.0
fp,170.0,130.0,101.0,136.0,141.0,181.0,136.0,147.0
mathews_corrcoef,0.019737,-0.048261,0.107091,0.108943,0.105638,0.008784,-0.037397,-0.005432
precision,0.508671,0.473684,0.564655,0.551155,0.548077,0.50411,0.480916,0.498294
recall,0.626335,0.41637,0.466192,0.594306,0.608541,0.654804,0.448399,0.519573
tn,110.0,150.0,179.0,144.0,139.0,99.0,144.0,133.0
