In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

import quandl
import pyfin as pn
import wbdata

In [25]:
raw = quandl.get("NASDAQOMX/NQIT", authtoken="iLpPPWLTPzmNi42cz3mY").drop(['High', 'Low', 'Dividend Market Value'], axis = 1)
it_debt_sec = quandl.get("WPSD/ITA_DP_DOD_DLDS_CR_GG_CD", authtoken="iLpPPWLTPzmNi42cz3mY")


In [26]:
interp_index = raw.asfreq('D')['Index Value'].interpolate(method = 'from_derivatives')
total_mkt = raw.asfreq('D')['Total Market Value'].interpolate(method = 'from_derivatives')

In [27]:
interp_debt = it_debt_sec.asfreq('D').interpolate(method = 'from_derivatives').loc['2001-04-18':'2018-07-27']

In [28]:
data = pd.DataFrame([interp_index, total_mkt], index = ['index', 'market']).T

In [29]:
data['ma5'] = data['index'].rolling(window=5).mean()
data['ma20'] = data['index'].rolling(window=20).mean()

In [30]:
data_ma = data.dropna(axis = 0)

In [31]:
indicators = {"SE.XPD.TERT.PC.ZS": "expenditure per student", 
              "IC.REG.DURS": 'Time to start business', 
              'IC.TAX.TOTL.CP.ZS': 'Commercial tax rate',
              'NE.CON.PRVT.PP.CD': 'Houshold final consumption'}

In [32]:
ec_data = wbdata.get_dataframe(indicators, country = ['IT'], data_date=(pd.Timestamp('2001-03-30'),pd.Timestamp('2018-07-27')))

In [33]:
data_merged = pd.concat([data_ma, interp_debt], axis = 1, sort = False)

In [34]:
eur_gbp = pd.read_csv('eur_gbp_test.csv').drop(['CurrencyPair', 'cDealable', 'lTid'], axis = 1)

In [35]:
eur_gbp['RateDateTime'] = pd.to_datetime(eur_gbp['RateDateTime'])
eur_gbp = eur_gbp.set_index('RateDateTime')

In [75]:
pip = (np.round((eur_gbp['RateAsk'] - eur_gbp['RateBid'])*10000))

In [76]:
pip_diff = pip.diff()
pip_diff[0] = 0
pip_diff = pip_diff.apply(lambda x: int(x))

#### Analysis

In [183]:
X = pip_diff

In [184]:
y = pip_diff.shift(periods = -1).fillna(method = 'ffill')

In [185]:
X = X.values.reshape(-1,1)
y = y.values.reshape(-1,1)

In [186]:
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
from sklearn import tree
from sklearn.utils import compute_class_weight
from sklearn.model_selection import TimeSeriesSplit
import graphviz

In [189]:
def trees(X,y):
    
    avg = []
    for d in range(15,20):
        
        tscore = []
        vscore = []
        tscv = TimeSeriesSplit(n_splits=3)
        for train_index, test_index in tscv.split(X):
            X_train, X_test = X[:train_index[-1]], X[:test_index[-1]]
            y_train, y_test = y[:train_index[-1]], y[:test_index[-1]]
                
            w = compute_class_weight('balanced', np.unique(*y.reshape(1,-1).tolist()), *y.reshape(1,-1).tolist())
            weights = dict(zip(np.unique(y_train), w))
            
            clf = ExtraTreesClassifier(n_estimators = 100, criterion = 'gini', max_depth=d, class_weight=weights)            

            
            clf.fit(X_train, y_train)
            t = clf.score(X_train, y_train)
            v = clf.score(X_test,y_test)
            tscore.append(t)
            vscore.append(v)
        
        print('With depth: %i \n Average tscore = %f and vscore = %f' %(d, np.mean(tscore), np.mean(vscore)))
        avg.append(np.mean(vscore))
    
    opt_d = np.argmax(avg) + 3
    clf = ExtraTreesClassifier(n_estimators = 100, criterion='gini', max_depth=opt_d)
    
    clf.fit(X,y)
    return clf, opt_d

In [190]:
reg, opt_d = trees(X,y)



With depth: 15 
 Average tscore = 0.009581 and vscore = 0.010760




With depth: 16 
 Average tscore = 0.009581 and vscore = 0.010760




With depth: 17 
 Average tscore = 0.009581 and vscore = 0.010760




KeyboardInterrupt: 

In [None]:
%notify