In [1]:
# load dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import random
import tensorflow as tf

Using TensorFlow backend.


In [2]:
def CNN(d,f,e, my_seed):
    np.random.seed(my_seed)
    random.seed(my_seed)
    tf.random.set_seed(my_seed)

    y = to_categorical(f)
    
    model = Sequential()
    model.add(Dense(units=50, activation='relu', input_dim=d.shape[1]))
    model.add(Dense(units=50, activation='relu'))
    model.add(Dense(units=y.shape[1], activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(d, y, epochs=e, shuffle=True, verbose=0)
    return model

In [3]:
# read data
df = pd.read_csv('Data/combined_2019.csv')
del df['sentiment']
begin_drop = 91
end_drop = 101
begin = 67
end = 91

In [4]:
# create moving averages
for i in range(31):
    colname = 'ma' + str(i+1)
    df[colname] = df.iloc[:,(i+8):(i+18)].sum(axis=1)

df.drop(df.columns[begin_drop:end_drop], axis=1, inplace=True)

In [5]:
# Select trainging hour and test hour
data = df[df['hour'].isin([6,7,8])]
del df

In [6]:
# Categorize the 'sum' column the number of PIPs over selected range
def setlabels(x):
    if x < -5:
        return -1
    elif x > 5:
        return 1
    else:
        return 0

data["labels"] = data["ma31"].apply(setlabels)

In [7]:
profit_tot =[]
train_int = 1000
predict_int = 500
count = int((data.shape[0] - train_int) / predict_int)

df_predict = pd.DataFrame()
#df_predict = data.iloc[0:train_int+100].copy()
#df_predict['class'] = 0

for j in range(count):
    
    start = predict_int * j

    # Get fitting data
    X_data = data.iloc[start:start + train_int].copy()
    X = X_data.iloc[:,begin:end].to_numpy()
    y = data['labels'].iloc[start:start+train_int]
    le = LabelEncoder()
    le.fit(y)
    y_encoded = le.transform(y)

    cnn = CNN(X, y_encoded, 1, 49)

    # Get predicting data
    X_pred = data.iloc[start + train_int + 100:start + train_int + predict_int + 100].copy()
    X_predict = X_pred.iloc[:,begin:end].to_numpy()
    X_pred['class'] = le.inverse_transform(cnn.predict_classes(X_predict))
    X_pred = X_pred.drop(X_pred[(X_pred['class'] == 0)].index)
    df_predict = df_predict.append(X_pred)
    
    X_pred['profit'] = X_pred['class'] * X_pred['ma31']
    
    profit = X_pred['profit'].sum()
    print("Est Profit Cohort " + str(j) + ": " + str(profit))
    profit_tot.append(profit)

print("Total Profit " + " : " + str(sum(profit_tot)))

Est Profit Cohort 0: 31.499999999999982
Est Profit Cohort 1: 175.4
Est Profit Cohort 2: 156.29999999999995
Est Profit Cohort 3: -17.000000000000004
Est Profit Cohort 4: -40.49999999999999
Est Profit Cohort 5: 109.29999999999998
Est Profit Cohort 6: 1.9999999999999938
Est Profit Cohort 7: 81.69999999999999
Est Profit Cohort 8: -63.30000000000001
Est Profit Cohort 9: 40.99999999999999
Est Profit Cohort 10: 0.2000000000000055
Est Profit Cohort 11: -40.7
Est Profit Cohort 12: 15.500000000000005
Est Profit Cohort 13: -21.700000000000003
Est Profit Cohort 14: 0.8000000000000007
Est Profit Cohort 15: 0.0
Est Profit Cohort 16: -3.700000000000003
Est Profit Cohort 17: 21.30000000000002
Est Profit Cohort 18: -33.3
Est Profit Cohort 19: -11.9
Est Profit Cohort 20: -10.400000000000002
Est Profit Cohort 21: -10.7
Est Profit Cohort 22: 10.200000000000003
Est Profit Cohort 23: 5.9
Est Profit Cohort 24: -21.800000000000004
Est Profit Cohort 25: 1.400000000000002
Est Profit Cohort 26: 6.299999999999993

In [8]:
df_predict.to_csv('2019_predict_5_cnn.csv')