In [1]:
# load dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

import random
from datetime import datetime

In [10]:
def SVM(d,f,e):
    #np.random.seed(my_seed)
    #random.seed(my_seed)
    
    model = SVC(kernel='poly',gamma='auto',C=e)
    model.fit(d, f)
    return model

In [3]:
# read data
df = pd.read_csv('../Data/combined_2019.csv')
del df['sentiment']
begin_drop = 91
end_drop = 101
begin = 67
end = 91

In [4]:
# create moving averages
for i in range(31):
    colname = 'ma' + str(i+1)
    df[colname] = df.iloc[:,(i+8):(i+18)].sum(axis=1)

df.drop(df.columns[begin_drop:end_drop], axis=1, inplace=True)

In [5]:
# Select trainging hour and test hour
data = df[df['hour'].isin([6,7,8])]
del df

In [6]:
# Categorize the 'sum' column the number of PIPs over selected range
def setlabels(x):
    if x < -5:
        return -1
    elif x > 5:
        return 1
    else:
        return 0

data["labels"] = data["ma31"].apply(setlabels)

In [17]:
sims = []
for k in range(10):
    
    profit_tot =[]
    train_int = 1000
    predict_int = 500
    count = int((data.shape[0] - train_int) / predict_int)

    df_predict = pd.DataFrame()

    for j in range(count):

        start = predict_int * j

        # Get fitting data
        X_data = data.iloc[start:start + train_int].copy()
        X = X_data.iloc[:,begin:end].to_numpy()
        y = data['labels'].iloc[start:start+train_int]
        le = LabelEncoder()
        le.fit(y)
        y_encoded = le.transform(y)

        svm = SVM(X, y_encoded, 0.25 + 0.025 * (k+1))

        del X_data, X, y, y_encoded
        #X = None
        #y = None
        #y_encoded = None
        
        # Get predicting data
        X_pred = data.iloc[start + train_int + 100:start + train_int + predict_int + 100].copy()
        X_predict = X_pred.iloc[:,begin:end].to_numpy()
        X_pred['class'] = le.inverse_transform(svm.predict(X_predict))
        X_pred = X_pred.drop(X_pred[(X_pred['class'] == 0)].index)
        X_pred['profit'] = X_pred['class'] * X_pred['ma31']
        profit = X_pred['profit'].sum()
        #print("Est Profit Cohort " + str(j) + ": " + str(profit))
        profit_tot.append(profit)
        X_pred = None
        X_predict = None
            
    now = datetime.now()    
    print("Profit for Sim " + str(0.025 * (k+1)) + " : " + str(sum(profit_tot)) + " time " + now.strftime("%H:%M:%S"))
    sims.append(sum(profit_tot))
    
print("Total Sum " + " : " + str(sum(sims)))

Profit for Sim 0.025 : 250.30000000000035 time 09:05:57
Profit for Sim 0.05 : 274.5999999999999 time 09:06:58
Profit for Sim 0.07500000000000001 : 292.6999999999996 time 09:07:39
Profit for Sim 0.1 : 407.7 time 09:08:18
Profit for Sim 0.125 : 356.5999999999997 time 09:08:59
Profit for Sim 0.15000000000000002 : 236.19999999999976 time 09:09:38
Profit for Sim 0.17500000000000002 : 176.79999999999995 time 09:10:15
Profit for Sim 0.2 : 199.5999999999998 time 09:10:53
Profit for Sim 0.225 : 219.8000000000001 time 09:11:30
Profit for Sim 0.25 : 308.29999999999984 time 09:12:08
Total Sum  : 2722.599999999999


In [None]:
df_sims = pd.DataFrame(sims)
df_sims.to_csv('2019_predict_sims.csv')