In [40]:
import numpy as np
import pandas as pd
from keras.models import Sequential, Model, load_model
from keras.layers import Dense
from keras.layers import LSTM, GRU, Conv1D, MaxPooling1D, Flatten, TimeDistributed, Concatenate
from keras.layers import GaussianNoise, BatchNormalization, Dropout
from keras.layers import Activation, merge, Input, concatenate, Reshape, add
from keras.optimizers import Adam, SGD
from keras.regularizers import l1, l2
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import Callback, LambdaCallback, TensorBoard, ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from keras.utils import np_utils, to_categorical
from keras.utils.vis_utils import plot_model 
import tensorflow as tf
import keras.backend as K
import matplotlib.pyplot as plt
import time
%matplotlib inline

In [3]:
def get_mean(df, window_size=260):
    """
    This function return the mean for each timestamp in the dataframe
    """
    out = df.copy()
    
    for i in range(window_size, len(df)):
        out.iloc[i, :] = df.iloc[i-window_size:i, :].mean()
    return out.iloc[window_size:, :]

def normalize_data(df, input_size=200):
    """
    Create normalized data
    """
    out = df.copy()
    
    for i in range(input_size, len(df)):
        out.iloc[i, :] = df.iloc[i, :] / df.iloc[i-input_size:i, :].mean() - 1
    
    return out.iloc[input_size:, :]

In [33]:
# load and gather necessary data
df = pd.read_csv("^GSPC.csv", index_col=[0], parse_dates=[0]).iloc[:, :4]
norm_df = normalize_data(df, input_size=200)
mean_df = get_mean(df, window_size=260)

In [39]:
# join all necessary data together
all_data = norm_df.join(mean_df, how="inner", lsuffix="_norm", rsuffix="_mean")
all_data = df.join(all_data, how="inner", lsuffix="_raw")

In [236]:
class Strategy:
    def __init__(self, confidence=0.5, min_return=1.05, is_optimal=False):
        if not is_optimal:
            self.indexer = load_model("indexer")
            self.confidence = confidence
            self.min_return = min_return
        self.n_stock = 0
        self.money = 1.0
        self.is_optimal = is_optimal
        
    def update(self, df):
        # set up conditions for whether our strategy can be executed
        overall_min = df.iloc[200:210, 2].min()
        overall_max = df.iloc[200:210, 1].max()
        
        if not self.is_optimal:
            # load means for future ten days so that we can recover our normalized output
            tmp = df.iloc[200:210, 8:12].mean()
            future_means = [tmp.iloc[1], tmp.iloc[2], tmp.iloc[[1, 3]].mean()]
            # get output from model and convert back to real value
            input_seq = df.iloc[0:200, 4:8]
            output = self.indexer.predict(input_seq.values.reshape((1, 200, 4)))
            for i, v in enumerate(output):
                output[i] = (1+v)*future_means[i]
            
        if self.money > 0: # then buy
            if self.is_optimal:
                self.n_stock = self.money / overall_min
                self.last_price = overall_min
                self.money = 0
            else:
                cut_point = (self.confidence*output[1] + (1-self.confidence)*output[2])[0, 0]
                if cut_point >= overall_min: # strategy succeeded
                    self.n_stock = self.money / cut_point
                    self.money = 0
                    self.last_price = cut_point
        elif self.n_stock > 0: # then sell
            if self.is_optimal:
                if overall_max > self.last_price:
                    self.money = self.n_stock * overall_max
                    self.n_stock = 0
            else:
                cut_point = (self.confidence*output[0] + (1-self.confidence)*output[2])[0, 0]
                if cut_point > self.last_price*self.min_return and cut_point <= overall_max:
                    self.money = self.n_stock * cut_point
                    self.n_stock = 0
                
    def end(self, start_price, end_price):
        self.money = self.money + self.n_stock * end_price
        inflation = (1 / start_price) * end_price
        if self.is_optimal:
            print("optimal strategy return rate: ", self.money-1)
        else:
            print("our strategy return rate: ", self.money-1)
        print("null strategy return rate: ", inflation-1)
        self.money = 1
        self.n_stock = 0

In [237]:
class BackTest:
    def __init__(self, all_data, strategy, start_index=None):
        self.data = all_data
        self.strategy = strategy

        if start_index is None:
            self.start_index = int(0.7*len(all_data))
            print("back test start from: ", all_data.iloc[self.start_index, :].name)

    def back_test(self):
        for i in range(self.start_index, len(all_data)-10, 10):
            self.strategy.update(all_data.iloc[i-200:i+10, :])
        self.strategy.end(all_data.iloc[self.start_index, 0], all_data.iloc[i, 3])

In [239]:
# for simple strategy based on our model
strategy = Strategy(confidence=1, min_return=1.05, is_optimal=False)
bt = BackTest(all_data, strategy)
bt.back_test()

# for simple strategy based on an optimal prediction model
strategy = Strategy(is_optimal=True)
bt = BackTest(all_data, strategy)
bt.back_test()

back test start from:  1998-06-18 00:00:00
our strategy return rate:  2.09513230207
null strategy return rate:  1.63692872122
back test start from:  1998-06-18 00:00:00
optimal strategy return rate:  371538.391212
null strategy return rate:  1.63692872122


# If we have a super model that predict with an accuracy of 100%, then we can be a billioniar in 10 yrs!!!!!!! Look at that unbelieveable gain of 371538!!!!!!

# Our strategy can also outperform inflation: 210% against 163%, which is fair but not the best