In [None]:
# import all necessary libraries
import pandas as pd
import numpy as np
from numpy.random import seed
import sklearn as sk
import matplotlib.pyplot as plt
import datetime as dt
import pandas_datareader as web
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils.vis_utils import plot_model
from keras import Sequential
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Adding a seed to ensure reproductible results
seed(1000)

In [None]:
# Choosing ticker, start and end date
ticker = 'AF.PA'

start = dt.datetime(2010,1,1) # series starts on 2010/01/01 ends on 2019/12/31
end = dt.datetime(2019,12,31)

In [None]:
# load data
data = web.DataReader(ticker, 'yahoo', start, end

We have to extract features from data. For now, we will be using 3 technical indicators often used in trading: Relative Strenght Index (RSI), Stochastic Oscillator and Bollinger Bands. All three indicators measure price momentum, which is the strenght of the trend at a particular time. More information on the computation of the indicators is given in the PDF. The following function iterates throught the entire dataframe and computes the various indicators.  

In [None]:
def indicators(data):

    # Relative Strength Index
    def RSI(data, n):
        rsi = []

        for p in range (n, len(data)):
            h = []
            b = []
            for i in range(0, n):
                diff = 100 * ((data['Adj Close'][(p - n + i) + 1] - data['Adj Close'][p - n + i]) / data['Adj Close'][p - n + i])

                if diff < 0:
                    b.append(abs(diff))
                elif diff > 0:
                    h.append(diff)

            u = (1 / n) * sum(h)
            d = (1 / n) * sum(b)

            rsi.append(100 - (100 / (1 + (u / d))))

        return rsi


    # Stochastic Oscillator
    def oscill(data, n, d):
        K = []
        n = n - 1
        ma = []

        for p in range(n, len(data)):
            values = []
            close = data['Adj Close'][p]

            for i in range(p - n, p + 1):
                values.append(data['Adj Close'][i])

            high = max(values)
            low = min(values)

            K.append(((close - low) / (high - low)) * 100)

        for p in range(d, len(K)):
            sum = 0

            for i in range(p - d, p):
                sum = sum + K[i]
                Kma = (1 / d) * sum

            ma.append(Kma)

        return K, ma;


    # Bollinger Bands
    def boll(data, k, n):
        MA = []
        boll_up = []
        boll_dw = []

        for p in range(n, len(data)):
            sum = 0
            var = 0

            for i in range(p - n, p):
                sum = sum + data['Adj Close'][i]
                ma = (1 / n) * sum

            for i in range(p - n, p):
                spread = (data['Adj Close'][i] - ma) ** 2
                var = var + spread

            sigma = np.sqrt((1 / n) * var)

            up = ma + k * sigma
            dw = ma - k * sigma

            MA.append(ma)
            boll_up.append(up)
            boll_dw.append(dw)

        return MA, boll_up, boll_dw;

    RSI = RSI(data, 9) # 9-days RSI
    K, D = oscill(data, 14, 5) # 14-days SO & 5-days moving average
    MA, boll_up, boll_dw = boll(data, 2, 20) # 20-days MA and 2-sd bollinger bands

    return RSI, K, D, MA, boll_up, boll_dw

We then split the data between training and testing using a short function we wrote. The function takes the data and a number between 0 and 1 as entries and outputs two dataframes. Data is split according on the "slice" value, training goes from index 0 to "slice" and testting from slice to the end. In this case "slice" is chosen so that training set represents 70% of the data, leaving 30% to testing.

In [None]:
def test_train_split(data, train):

    slice = train * len(data)
    slice = int(slice) # slice = index where test begins

    data_copy = data[data.index[0] : data.index[slice - 1]]
    data_test = data[data.index[slice] : data.index[len(data) - 1]]
    data = data_copy

    return data, data_test;


data, data_test = test_train_split(data, 0.7)

We need to tranform data such that it becomes a classification problem. We also need to coerce data to the necessary format. We write an encode function that does all that. First it creates an additional column 'position' which takes values "sell"/"buy"/"hold" depending on price mouvements. If price in t-1 is below price in t, then position(t) takes the value "buy" indicating the right move in t-1 was to buy the asset. If price in t-1 is greater than price in t, then position(t) takes the value "sell" indicating the right move in t-1 was to sell the asset. The new 'position' column is our output variable the model will try to predict. The features are given by the indicators computed earlier. We apply the encode function to both datasets. 

In [None]:
def encode(data):

    RSI, K, D, MA, boll_up, boll_dw = indicators(data)  # computes indicators

    list = []

    for p in range(1, len(data)):
        if data['Adj Close'][p] > data['Adj Close'][p - 1]:
            list.append(1)
        elif data['Adj Close'][p] == data['Adj Close'][p - 1]:
            list.append(2)
        elif data['Adj Close'][p] < data['Adj Close'][p - 1]:
            list.append(3)
        else:
            print('error')

    data.drop(['High', 'Low', 'Open', 'Close'], axis = 1, inplace = True)
    data.drop(data.index[0], axis = 0, inplace = True)

    data.insert(2, 'position', list)

    RSI = RSI[11: len(RSI)]
    D = D[2: len(D)]
    df = {'RSI': RSI, 'D': D, 'MA': MA, 'boll_up': boll_up, 'boll_dw': boll_dw}
    X = pd.DataFrame(df)

    y = data['position']
    y = y[19: len(y)]

    return data, X, y;


data, X, y = encode(data) # encodes data into buy/hold/sell and creates X and y np.arrays
data_test, X_test, y_test = encode(data_test) # encodes data into buy/hold/sell and creates X and y np.arrays

First attempt to modelling: We decide to use a Neural Network as our first model, right now the objective is to experiment with a generic model to ensure the feasability of the project. We start with a 4 layers NN, 1 input, 1 output and 2 hidden layers, 3 neurons per layer and 'relu' activation functions, expect for the last layer which uses 4 neurons (number of classes + 1) and a softmax function. We choose Sparse Categorical Cross Entropy as a loss function. For ergonomic purposes, the NN creation is condensed into a function.

In [None]:
def NeuralNet():

    NN = Sequential()

    NN.add(layers.Dense(3, activation = 'relu'))
    NN.add(layers.Dense(3, activation = 'relu'))
    NN.add(layers.Dense(3, activation = 'relu'))
    NN.add(layers.Dense(4, activation = 'softmax'))

    NN.compile(optimizer='adam',
               loss=keras.losses.SparseCategoricalCrossentropy(),
               metrics=keras.metrics.SparseCategoricalAccuracy(),
               )

    return NN

We now are able to train the NN on the train data, and predict the position for each day on the test data. We store the predictions in different formats for later use.

In [None]:
NN = NeuralNet() # creates Neural Net

NN.fit(X, y, epochs = 15) # fits the model

pred = NN.predict(X_test)
pred_class = pred.argmax(axis = -1) # Predicted class on test data

pred_proba =  NN.predict_proba(X_test)[:, 1] # computes predicted probabilities for each class

For readability, we also encodes the predicted class into string ("buy/"hold"/"sell") and append this new output to the dataframe.

In [None]:
def output_encode(pred_class, data):

    list = []

    for p in range(0, len(pred_class) - 1):
        if pred_class[p] == 1:
            list.append('buy')
        elif pred_class[p] == 2:
            list.append('hold')
        else:
            list.append('sell')

    data.drop(data.index[0 : 20], axis = 0, inplace = True)

    data.insert(3, 'pred_string', list)
    data.insert(4, 'pred_pos', pred_class[0 : -1])

    return data


data_test = output_encode(pred_class, data_test) # encodes output as categorical variables

To asses our model quality, we use a confusion matrix and a classification report 

In [None]:
conf_mat = confusion_matrix(y_test, pred_class)
report = classification_report(y_test, pred_class)
print(report, conf_mat)

To scale our results to real life, we create a function that computes the profits made by following the model. Everytime the model is right (predicts the right direction) we generate profits, and everytime the model is wrong we generate a loss. The profits function does not take compounded interests into account for now. 

In [None]:
def profits(data, amount):
    init = amount
    profits = []

    for p in range(1, len(data)):

        tx = ((data['Adj Close'][p] - data['Adj Close'][p - 1]) / data['Adj Close'][p - 1])

        if data['position'][p - 1] == 1 and data['pred_pos'][p - 1] == 1:
            profit = amount * tx
        elif data['position'][p - 1] == 1 and data['pred_pos'][p - 1] == 2:
            profit = 0
        elif data['position'][p - 1] == 1 and data['pred_pos'][p - 1] == 3:
            profit = - (amount * tx)
        elif data['position'][p - 1] == 2 and data['pred_pos'][p - 1] == 1:
            profit = 0
        elif data['position'][p - 1] == 2 and data['pred_pos'][p - 1] == 2:
            profit = 0
        elif data['position'][p - 1] == 2 and data['pred_pos'][p - 1] == 3:
            profit = 0
        elif data['position'][p - 1] == 3 and data['pred_pos'][p - 1] == 1:
            profit = amount * tx
        elif data['position'][p - 1] == 3 and data['pred_pos'][p - 1] == 2:
            profit = 0
        elif data['position'][p - 1] == 3 and data['pred_pos'][p - 1] == 3:
            profit = - (amount * tx)
        else:
            print('error')

        profits.append(profit)

    profits.insert(0, 0)

    return profits


amount = 1000

profits = profits(data_test, amount) # computes profits made with specified initial investment

data_test.insert(3, 'profit', profits)

sum_profits = sum(data_test['profit'])
print(round(sum_profits, 2))
percentage_gain = sum_profits * 100 / amount
cumsum = np.cumsum(profits)
print(round(percentage_gain, 2),'%') # sum profits and compute % return