In [27]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')   

In [36]:
class TestProject(object):
#Test Project — Algorithmic Trading
#Prediction-based Trading & Event-based Backtesting
# author: Milica Medic Kiralj
    
    def __init__(self):
        self.get_data()
        
    '''Retrieve and prepare data'''   
    def get_data(self):
        self.raw = pd.read_csv('http://hilpisch.com/ref_eikon_eod_data.csv', index_col=0, parse_dates=True).dropna()
        symbol = '.SPX'
        self.raw = pd.DataFrame(self.raw[symbol].iloc[:1000])
        self.raw.rename(columns={symbol:'price'}, inplace = True)
        self.data = self.raw.dropna()
        
    def select_data(self):
        data = self.data.copy()    
        return data
    
    def prepare_data_features(self):
        data = self.select_data()
        '''Log Return'''
        data['return']=np.log(data['price']/data['price'].shift(1))
        data.dropna(inplace=True)
        
        '''Direction'''
        data['direction']= np.where(data['return'] > 0, 1, 0)
        
        ''' 5 categories returns'''
        self.bins = [ -0.006, -0.0003, 0.0003, 0.006]
        data['bins'] = np.digitize(data['return'], bins=self.bins)
        
        '''Simple Moving Average feature 20-day SMA'''
        data['sma_20'] = data['price'].rolling(20).mean()

        '''Simple Moving Average feature 60-day SMA'''
        data['sma_60'] = data['price'].rolling(60).mean()
        
        '''SMA difference'''
        data['sma_diff'] = data['sma_20']-data['sma_60']
        
        '''Exponentially Weighted Moving Average feature 20-day EWMA'''
        data['ewma_20'] = data['price'].ewm(halflife=20).mean()
        
        '''Exponentially Weighted Moving Average feature 20-day EWMA'''
        data['ewma_60'] = data['price'].ewm(halflife=60).mean()

        ''' EWMA difference'''
        data['ewma_diff'] = data['ewma_20'] - data['ewma_60']
        
        '''Rolling volatility short window'''
        data['vol_20'] = data['return'].rolling(20).std()

        '''Rolling volatility long window'''
        data['vol_60'] = data['return'].rolling(60).std()
        
        data.dropna(inplace=True)
        
        '''Split into train (70%) & test data'''
        self.train, self.test = train_test_split(data, test_size=0.3, shuffle=False)
        
        '''Normalize the training features data to have: zero mean and standard deviation of one'''
        self.cols = ['sma_20','sma_60','sma_diff','ewma_20','ewma_60','ewma_diff','vol_20','vol_60']
        self.mu, self.std = self.train[self.cols].mean(), self.train[self.cols].std() 
        
        '''Normalize the train cols'''
        self.train[self.cols] = (self.train[self.cols] - self.mu) / self.std
        
        '''Normalize the test cols'''
        self.test[self.cols] = (self.test[self.cols] - self.mu) / self.std
        
        self.cols.extend(['return', 'direction', 'bins'])
        
        '''Prepare lags'''
        lags = 5
        self.cols_ = []
        for col in self.cols:
            for lag in range(1, lags + 1):
                col_ = col + f'_lag_{lag}'
                self.train[col_] = self.train[col].shift(lag)
                self.test[col_] = self.test[col].shift(lag)
                self.cols_.append(col_)
        self.train_v = self.train[self.cols_].copy()
        self.test_v = self.test[self.cols_].copy()
        self.train_v.dropna(inplace=True)
        self.test_v.dropna(inplace=True)
        self.train.dropna(inplace=True)
        self.test.dropna(inplace=True)
        #print (self.train, self.test)
       
    def get_date_price (self, bar):
        date = str(self.data.index[bar])[:10]
        price = self.data.price.iloc[bar]
        return date, price
        
    
    def place_sell_order(self, bar, amount = None, units = None):
        self.ptc = 0
        self.ftc = 0
        date, price = self.get_date_price(bar)
        if units is None:
            units = int(amount/price)
        self.amount -= (units*price) * (1 + self.ptc) + self.ftc
        self.units -= units
        self.trades += 1
            
    
    def place_buy_order(self, bar, amount = None, units = None):
        self.ptc = 0
        self.ftc = 0
        date, price = self.get_date_price(bar)
        if units is None:
            units = int(amount/price)
        self.amount += (units*price) * (1 - self.ptc) - self.ftc
        self.units += units
        self.trades += 1
    
    def close_out(self, bar):
        date, price = self.get_date_price(bar)
        perf = (self.amount / self.initial_amount - 1) * 100
        print(f'{date}| initial_amount = {self.initial_amount:.2f}')
        print(f'{date}| final_amount = {self.amount:.2f}')
        print(f'{date}| net performance |%| = {perf:.4f}')
        print(f'{date}| number of trades |%| = {self.trades}')
        
    
    '''Train data'''
    
    def fit_and_predict_train(self):
        self.units = 0
        self.trades = 0
        self.order = 0
        self.initial_amount = 10000
        self.amount = 10000
        self.prepare_data_features()
        self.data = self.train.copy()
        self.models = {
          'GaussianNB': GaussianNB(),
          'LogisticRegression': LogisticRegression(C=1, solver='lbfgs', max_iter=500),
          'DecisionTreeClassifier': DecisionTreeClassifier(max_depth=7),
          'SVM': SVC(C=1, gamma='auto', kernel='linear'),
          'MLPClassifier': MLPClassifier(hidden_layer_sizes=[64], shuffle=False,max_iter=5000)}
        print(f'Train Data')

        for m in self.models:
            self.lags = 1
            self.model = self.models[m]
            self.model.fit(self.data[self.cols_], self.data['direction'])
            for bar in range(self.lags, len(self.data[self.cols_])):
                date, price = self.get_date_price (bar)
                self.state = self.train_v[self.cols_].iloc[bar - self.lags:bar]
                self.state.values.reshape(1, self.lags, len(self.train_v.columns))
                self.order = np.where(self.model.predict(self.state.values)==1, 1, -1)
                if self.order == 1:
                    if self.order == -1:
                        self.place_buy_order(bar - 1, units=-self.units)
                    self.place_sell_order(bar - 1, amount = self.amount)
                    self.order = 1
                elif self.order == -1:
                    if self.order == 1:
                        self.place_sell_order (bar - 1, units = self.units)
                    self.place_sell_order (bar - 1, amount = self.amount)
                    self.order = -1
            print(f'{date}| model = {self.model}')
            self.close_out(bar)
            
    '''Test data''' 
    
    def fit_and_predict_test(self):
        self.units = 0
        self.trades = 0
        self.order = 0
        self.initial_amount = 10000
        self.amount = 10000
        self.prepare_data_features()
        self.data = self.test.copy()
        self.models = {
          'GaussianNB': GaussianNB(),
          'LogisticRegression': LogisticRegression(C=1, solver='lbfgs', max_iter=500),
          'DecisionTreeClassifier': DecisionTreeClassifier(max_depth=7),
          'SVM': SVC(C=1, gamma='auto', kernel='linear'),
          'MLPClassifier': MLPClassifier(hidden_layer_sizes=[64], shuffle=False,max_iter=5000)}          
        print(f'Test Data')
        for m in self.models:
            self.lags = 1
            self.model = self.models[m]
            self.model.fit(self.data[self.cols_], self.data['direction'])
            for bar in range(self.lags, len(self.data[self.cols_])):
                date, price = self.get_date_price (bar)
                self.state = self.test_v[self.cols_].iloc[bar - self.lags:bar]
                self.state.values.reshape(1, self.lags, len(self.test_v.columns))
                self.order = np.where(self.model.predict(self.state.values)==1, 1, -1)
                if self.order == 1:
                    if self.order == -1:
                        self.place_buy_order(bar - 1, units=-self.units)
                    self.place_sell_order(bar - 1, amount = self.amount)
                    self.order = 1
                elif self.order == -1:
                    if self.order == 1:
                        self.place_sell_order (bar - 1, units = self.units)
                    self.place_sell_order (bar - 1, amount = self.amount)
                    self.order = -1
          
            print(f'{date}| model = {self.model}')
            self.close_out(bar)
            

        

In [37]:
prediction = TestProject()
prediction.fit_and_predict_train()
prediction.fit_and_predict_test()

Train Data
2012-11-07| model = GaussianNB()
2012-11-07| initial_amount = 10000.00
2012-11-07| final_amount = 508.48
2012-11-07| net performance |%| = -94.9152
2012-11-07| number of trades |%| = 652
2012-11-07| model = LogisticRegression(C=1, max_iter=500)
2012-11-07| initial_amount = 10000.00
2012-11-07| final_amount = 508.48
2012-11-07| net performance |%| = -94.9152
2012-11-07| number of trades |%| = 1304
2012-11-07| model = DecisionTreeClassifier(max_depth=7)
2012-11-07| initial_amount = 10000.00
2012-11-07| final_amount = 508.48
2012-11-07| net performance |%| = -94.9152
2012-11-07| number of trades |%| = 1956
2012-11-07| model = SVC(C=1, gamma='auto', kernel='linear')
2012-11-07| initial_amount = 10000.00
2012-11-07| final_amount = 508.48
2012-11-07| net performance |%| = -94.9152
2012-11-07| number of trades |%| = 2608
2012-11-07| model = MLPClassifier(hidden_layer_sizes=[64], max_iter=5000, shuffle=False)
2012-11-07| initial_amount = 10000.00
2012-11-07| final_amount = 508.48
20