In [5]:
import backtrader as bt
#import matplotlib.pyplot as plt
import backtrader.indicators as btind

import numpy as np
from sklearn.metrics import confusion_matrix
from datetime import datetime, timedelta
from sklearn import svm
import pandas as pd
import math
from sklearn import preprocessing
from math import sqrt
from technicalSignals import momentum,SMA,inBBands


In [6]:
tickers=['AAPL','AMZN','GOOGL','MSFT','FB','INTC','CSCO','CMCSA','NVDA','NFLX']
TREND_WINDOWs = [(-48,0),(-35,0),(-28,0),(-7,0),(-1,0),(1,2),(1,8),(1,29),(1,36),(1,50)]
TREND_WINDOWs = [(1,36)]
kind_of_dataset = 'AffectiveSpace'

In [6]:
class DatasetManager:
    def __init__(self):
        X_raw = None
        Y_raw = None
        Y = None
        X = None
    
    def load_dataset(self, ticker, kind, technicalFeatures=False):
        types = {'Summary': '/home/andrea/Desktop/NLFF/intrinioDatasetUpdated/SentimentFullAggregatedHourly/',
            'AffectiveSpace': '/home/andrea/Desktop/NLFF/AffectiveSpace/Aggregated_AffectSummary_dataset/',
            'Title': '/home/andrea/Desktop/NLFF/intrinioDatasetUpdated/SentimentTitleAggregatedHourly/',
            'Senticnet':''}
        news =  pd.read_csv(types[kind]+ticker+'.csv')
        price = pd.read_csv('/home/andrea/Desktop/NLFF/indexes/indexes'+ticker+'.csv')
        price = price.rename(index=str, columns={"date": "DATE"})
        news = news.rename(index=str, columns={"initTime": "DATE"})
        news = news.drop(['Unnamed: 0'], axis=1)
        news['DATE'] = [datetime.strptime(row, '%Y-%m-%d %H:%M:%S') for row in news['DATE']]
        # This datased is already GMT+0
        price['DATE'] = [datetime.strptime(row, '%Y-%m-%d %H:%M:%S') for row in price['DATE']]
        if(technicalFeatures):
            price['mom_30'] = momentum(price, 30)
            price['mom_50'] = momentum(price, 50)
            price['mom_100'] = momentum(price, 100)
            price['mom_150'] = momentum(price, 150)
            price['SMA_30'] = SMA(price, 30)
            price['SMA_50'] = SMA(price, 50)
            price['SMA_100'] = SMA(price, 100)
            price['SMA_150'] = SMA(price, 150)
            price['in_BBands'] = inBBands(price)

        #ALLIGNMENT
        initDate = max(news['DATE'][0], datetime(2017, 5, 22, 0, 0, 0))
        finalDate = min(news['DATE'][len(news)-1],datetime(2018, 6, 20, 0, 0, 0))
        news.drop(news[news.DATE > finalDate].index, inplace=True)
        news.drop(news[news.DATE < initDate].index, inplace=True)
        news = news.reset_index(drop=True)
        price.drop(price[price.DATE > finalDate].index, inplace=True)
        price.drop(price[price.DATE < initDate].index, inplace=True)
        price = price.reset_index(drop=True)
        assert len(price) == len(news)
        # FEATURES
        sentiment = news.drop(['DATE'], axis=1)
        X = sentiment
        for window in [5,10,15,20,30,50]:
            temp = sentiment.rolling(window).mean()
            temp.columns = temp.columns +'_'+str(window)
            X = pd.concat([X, temp],axis=1)
        if(technicalFeatures):   
            technical_features = ['mom_30','mom_50','mom_100','mom_150','SMA_30','SMA_50','SMA_100','SMA_150','in_BBands']
            X = pd.concat([X, price[technical_features]],axis=1)

            
        #NORMALIZATION:
        min_max_scaler = preprocessing.MinMaxScaler()
        X = np.nan_to_num(np.asarray(X, dtype=float))
        X = np.asarray(min_max_scaler.fit_transform(X))
        self.X_raw = X
        self.Y_raw = price

    def get_dataset_for_trend(self, init, finish, perc_train = 0.7):
        y = list()
        x = list()
        dates = list()
        price = self.Y_raw
        for i in range(abs(init),len(price)-finish):
            cumulative_return =  (price.iloc[i+finish]['open']-price.iloc[i+init]['open'])/price.iloc[i+init]['open']
            s =np.sign(cumulative_return)
            y.append(0 if s==-1 else 1)
            dates.append(price.iloc[i]['DATE'])
            x.append(self.X_raw[i])

        y = np.array(y)
        x = np.array(x)
        self.X = x
        self.Y = y
        nt=math.ceil(len(x)*perc_train)
        x_tv = x[:nt]
        y_tv = y[:nt]
        x_test = x[nt:]
        y_test = y[nt:]
        dates_test = dates[nt:]
        return (x_tv,y_tv),(x_test,y_test),dates_test

In [7]:
def cv(x_tv,y_tv):
    best_mcc = -float(np.inf)
    best_c = 0
    best_g = 0
    for c in np.logspace(-3,4,10):
        for g in np.logspace(-3,4,10):
            print('.', end='')
            trainpoint=math.floor(len(x_tv)*0.50)
            dimval=math.floor(trainpoint*0.25)
            endval=trainpoint+dimval
            #Cross validation
            cvMCC = 0
            for i in range(0,4):
                x_train=x_tv[0:trainpoint]
                y_train=y_tv[0:trainpoint]
                x_val=x_tv[trainpoint:endval]
                y_val=y_tv[trainpoint:endval]
                trainpoint=trainpoint+dimval
                endval=endval+dimval
                svm_model = svm.SVC(kernel='rbf', C=c, gamma=g)
                svm_model.fit(x_train,y_train)
                y_pred = svm_model.predict(x_val)
                confmatrix = confusion_matrix(y_val, y_pred)
                tn, fp, fn, tp = confmatrix.ravel()
                denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
                mcc = 0 if denom== 0 else (tp*tn -fp*fn)/sqrt(denom)
                cvMCC += mcc/6

            if(cvMCC > best_mcc):
                best_mcc = cvMCC
                best_c = c
                best_g = g
    return (best_c,best_g)

# def normalize2(values):
#     pos_val = []
#     for v in values:
#         if v>0:
#             pos_val.append(v)
#     m = min(pos_val)
#     M = max(pos_val)
#     pos_val(pos_val-m)/(M-m)
#     neg_val = []
#     for v in values:
#         if v<=0:
#             neg_val.append(-v)
#     m = min(neg_val)
#     M = max(neg_val)
#     neg_val(neg_val-m)/(M-m)
#     p = 0
#     n = 0
#     for i in 
def normalize(values):
    m = min(values)
    M = max(values)
    values = 2*(values-m)/(M-m)-1
    return values

In [5]:
# ========== MAKE PREDICTIONS FILE ====================

for (init, finish) in TREND_WINDOWs:
    print('\n\n\n====================  trend: ',init,' ',finish, ' ==================== \n\n')
    predictions = pd.DataFrame()
    for ticker in tickers:
        print('\n'+ticker)
        ds = DatasetManager()
        ds.load_dataset(ticker = ticker, kind = kind_of_dataset, technicalFeatures=False)
        (x_tv,y_tv),(x_test,y_test),dates_test = ds.get_dataset_for_trend(init, finish, perc_train = 0.7)
        (best_c,best_g) = cv(x_tv,y_tv)
        svm_model = svm.SVC(kernel='rbf', C=best_c, gamma=best_g)
        svm_model.fit(x_tv,y_tv)
        y_pred = svm_model.decision_function(x_test) #This is not the class, is the value of the prediction of the SVM
        if not predictions.empty:
            assert list(predictions.index) == dates_test
        predictions[ticker] = normalize(y_pred)
        predictions[ticker+'_not_norm'] = y_pred
        predictions.index = dates_test
    predictions.to_csv('testPredictions/onlyNews'+kind_of_dataset+'/AllTickers_'+str(init)+'_'+str(finish)+'.csv')








AAPL


NameError: name 'DatasetManager' is not defined

In [8]:
# ===========================================    Trading Library utilities

    
# class Sizer(bt.Sizer):
#     params = dict(stake=1)

#     def _getsizing(self, comminfo, cash, data, isbuy):
#         dt, i = self.strategy.datetime.date(), data._id
#         s = self.p.stake * (1 + (not isbuy))
#         print('{} Data {} OType {} Sizing to {}'.format(
#             dt, data._name, ('buy' * isbuy) or 'sell', s))

#         return s

    
class Strategy(bt.Strategy):
    params = dict(
        pred_threshold = None,        #buy if prediction more than this threshold (prediction values normalized 1: most secure prediction)
        num_pred_over_threshold = None, #buy if more than this number of prediction over threshold
        forecast_window = None,
        predictions = None,
        verbose = None
    )

    def log(self, txt, dt=None):
        dt = dt or self.datetime.datetime().strftime('%Y-%m-%d %H:%M:%S')
        print('%s, %s' % (dt, txt))

    def notify_order(self, order):
        if self.verbose:
            if order.status in [order.Submitted, order.Accepted]:
                return
            if order.status in [order.Completed]:
                if order.isbuy():
                    self.log('BUY EXECUTED, %.2f' % order.executed.price)
                elif order.issell():
                    self.log('SELL EXECUTED, %.2f' % order.executed.price)
            elif order.status in [order.Canceled, order.Margin, order.Rejected]:
                self.log('Order Canceled/Margin/Rejected')

        
    def __init__(self):
        self.planned_sell = {}  # PLANNED SELL FOR EACH BUY AFTER TREND WINDOW dict: selldate, stock -> amount
        self.forecast_window = self.params.forecast_window
        self.predictions = self.params.predictions
        self.pred_threshold = self.params.pred_threshold
        self.num_pred_over_threshold = self.params.num_pred_over_threshold
        self.verbose = self.params.verbose

    def next(self): 
        positions = {}
        for i, d in enumerate(self.datas):
            #dt = self.datetime.date()
            dt = self.datetime.datetime().strftime('%Y-%m-%d %H:%M:%S')
            time_idx = np.where(self.predictions.index==dt)[0][0]
            dn = d._name
            #ticker_pred = self.predictions[dn].at[dt]
            ticker_pred = self.predictions[dn+'_not_norm'].at[dt]
            previous_pred = self.predictions[dn].iloc[time_idx-self.num_pred_over_threshold:time_idx]
            previous_pred = sum([1 if p>self.pred_threshold else 0 for p in previous_pred]) #count prev over threshold
            pos = self.getposition(d).size
            positions[dn]=pos
            # Strategy: buy if abs(pred)> threshold or if more than n predictions positive
            if(abs(ticker_pred)>self.pred_threshold and previous_pred>=self.num_pred_over_threshold):
                # Buy and set planned sell if prediction positive
                if(ticker_pred > 0):
                    self.buy(data = d, size=1)                
                    idx_to_sell = time_idx+self.forecast_window
                    self.planned_sell[(dn,idx_to_sell)] = 1
                else:
                    self.sell(data = d, size=1)                
                    idx_to_sell = time_idx+self.forecast_window
                    self.planned_sell[(dn,idx_to_sell)] = -1
            #Execute sell planned for now
            if (dn,time_idx) in self.planned_sell:
                amount = self.planned_sell[(dn,time_idx)]
                if(amount>0):
                    self.sell(data = d, size=amount)
                else:
                    self.buy(data = d, size=amount)
                del self.planned_sell[(dn,time_idx)] 
        if self.verbose:
                print('{} Positions {}'.format(dt, positions))

                
                
                
def printTradeAnalysis(analyzer):
    total_open = analyzer.total.open
    total_closed = analyzer.total.closed
    total_won = analyzer.won.total
    total_lost = analyzer.lost.total
    win_streak = analyzer.streak.won.longest
    lose_streak = analyzer.streak.lost.longest
    pnl_net = round(analyzer.pnl.net.total,2)
    strike_rate = (total_won / total_closed) * 100
    h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost']
    h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net']
    r1 = [total_open, total_closed,total_won,total_lost]
    r2 = [strike_rate, win_streak, lose_streak, pnl_net]
    if len(h1) > len(h2):
        header_length = len(h1)
    else:
        header_length = len(h2)
    print_list = [h1,r1,h2,r2]
    row_format ="{:<15}" * (header_length + 1)
    print("Trade Analysis Results:")
    for row in print_list:
        print(row_format.format('',*row))

def printSQN(analyzer):
    sqn = round(analyzer.sqn,2)
    print('SQN: {}'.format(sqn))


In [9]:
pred_threshold = 0.5       #buy if prediction more than this threshold (prediction values normalized 1: most secure prediction)
num_pred_over_threshold = 2  #buy if more than this number of prediction over threshold
init_value = 100000.0

for (init, finish) in TREND_WINDOWs:
    print('\n====================  trend: ',init,' ',finish, ' ==================== \n')
    cerebro = bt.Cerebro(stdstats=False)
    cerebro.addobservermulti(bt.observers.BuySell)
    cerebro.broker.setcash(init_value)
    cerebro.broker.setcommission(commission=0.0001)
    predictions = pd.read_csv('testPredictions/onlyNewsSu/AllTickers_'+str(init)+'_'+str(finish)+'.csv', index_col = 0)
    dates_test = [datetime.strptime(row, '%Y-%m-%d %H:%M:%S') for row in predictions.index]
    evenPlot = True
    for ticker in tickers:
        data = bt.feeds.GenericCSVData(
                    dataname='/home/andrea/Desktop/NLFF/indexes/indexes'+ticker+'.csv',
                    name=ticker,
                    timeframe = bt.TimeFrame.Minutes, 
                    compression = 60,
                    datetime=1,open=2,high=3,low=4,close=5,volume=6,openinterest=-1,
                    fromdate=dates_test[0],
                    todate=dates_test[-1]+timedelta(minutes=1),
                    reverse=False)
#         if(evenPlot):
#             data0 = data
#             evenPlot = False
#         else:
#             data.plotinfo.plotmaster = data0
#             evenPlot = True
        cerebro.adddata(data)
        
    #cerebro.addsizer(Sizer)    
    assert init == 1 #Up to now only future
    cerebro.addstrategy(Strategy,
                        pred_threshold = pred_threshold,
                        num_pred_over_threshold = num_pred_over_threshold,
                        forecast_window=finish,
                        predictions = predictions,
                        verbose = True)
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name="ta")
    cerebro.addanalyzer(bt.analyzers.SQN, _name="sqn")
    print('Starting Portfolio Value: %.2f' % init_value)
    strategy = cerebro.run()[0]
    final_value = cerebro.broker.getvalue()
    printTradeAnalysis(strategy.analyzers.ta.get_analysis())
    printSQN(strategy.analyzers.sqn.get_analysis())
    print('Final Portfolio Value: %.2f \nGain: %.2f' % (final_value, final_value/init_value - 1))





FileNotFoundError: File b'testPredictions/onlyNewsAffectiveSpace/AllTickers_1_36.csv' does not exist

In [None]:
cerebro.plot(volume=False)

In [None]:
## MARKET PERFORMANCE:
class BuyAndHold(bt.Strategy):
    def log(self, txt, dt=None):
        dt = dt or self.datetime.datetime().strftime('%Y-%m-%d %H:%M:%S')
        print('%s, %s' % (dt, txt))

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
      
    def __init__(self):
        return

    def next(self): 
        for i, d in enumerate(self.datas):
            self.buy(data = d, size=1)                


init_value = 3670.0 #Correct value to buy one stock a the beginning
init_value = 100000

for (init, finish) in TREND_WINDOWs:
    print('\n====================  trend: ',init,' ',finish, ' ==================== \n')
    cerebro = bt.Cerebro()
    cerebro.broker.setcash(init_value)
    cerebro.broker.setcommission(commission=0.0001)
    predictions = pd.read_csv('testPredictions/AllTickers_'+str(init)+'_'+str(finish)+'.csv', index_col = 0)
    dates_test = [datetime.strptime(row, '%Y-%m-%d %H:%M:%S') for row in predictions.index]
    for ticker in tickers:
        data = bt.feeds.GenericCSVData(
                    dataname='/home/andrea/Desktop/NLFF/indexes/indexes'+ticker+'.csv',
                    name=ticker,
                    timeframe = bt.TimeFrame.Minutes, 
                    compression = 60,
                    datetime=1,open=2,high=3,low=4,close=5,volume=6,openinterest=-1,
                    fromdate=dates_test[0],
                    todate=dates_test[-1]+timedelta(minutes=1),
                    reverse=False)
        cerebro.adddata(data)
        
    cerebro.addstrategy(BuyAndHold)
    cerebro.addanalyzer(bt.analyzers.SQN, _name="sqn")
    print('Starting Portfolio Value: %.2f' % init_value)
    strategy = cerebro.run()[0]
    final_value = cerebro.broker.getvalue()
    print('Final Portfolio Value: %.2f \nGain: %.2f' % (final_value, final_value/init_value - 1))