In [1]:
# load dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

import random

In [2]:
def SVM(d,f,my_seed):
    np.random.seed(my_seed)
    random.seed(my_seed)
    
    model = SVC(kernel='poly',gamma='auto',C=.25)
    model.fit(d, f)
    return model

In [3]:
# read data
df = pd.read_csv('Data/combined_2019.csv')
del df['sentiment']
begin_drop = 91
end_drop = 101
begin = 67
end = 91

In [4]:
# create moving averages
for i in range(31):
    colname = 'ma' + str(i+1)
    df[colname] = df.iloc[:,(i+8):(i+18)].sum(axis=1)

df.drop(df.columns[begin_drop:end_drop], axis=1, inplace=True)

In [5]:
# Select trainging hour and test hour
data = df[df['hour'].isin([6,7,8])]
del df

In [6]:
# Categorize the 'sum' column the number of PIPs over selected range
def setlabels(x):
    if x < -5:
        return -1
    elif x > 5:
        return 1
    else:
        return 0

data["labels"] = data["ma31"].apply(setlabels)

In [7]:
profit_tot =[]
train_int = 1000
predict_int = 500
count = int((data.shape[0] - train_int) / predict_int)

df_predict = pd.DataFrame()
#df_predict = data.iloc[0:train_int+100].copy()
#df_predict['class'] = 0

for j in range(count):
    
    start = predict_int * j

    # Get fitting data
    X_data = data.iloc[start:start + train_int].copy()
    X = X_data.iloc[:,begin:end].to_numpy()
    y = data['labels'].iloc[start:start+train_int]
    le = LabelEncoder()
    le.fit(y)
    y_encoded = le.transform(y)

    svm = SVM(X, y_encoded, 49)

    # Get predicting data
    X_pred = data.iloc[start + train_int + 100:start + train_int + predict_int + 100].copy()
    X_predict = X_pred.iloc[:,begin:end].to_numpy()
    X_pred['class'] = le.inverse_transform(svm.predict(X_predict))
    X_pred = X_pred.drop(X_pred[(X_pred['class'] == 0)].index)
    df_predict = df_predict.append(X_pred)
    
    X_pred['profit'] = X_pred['class'] * X_pred['ma31']
    
    profit = X_pred['profit'].sum()
    print("Est Profit Cohort " + str(j) + ": " + str(profit))
    profit_tot.append(profit)

print("Total Profit " + " : " + str(sum(profit_tot)))

Est Profit Cohort 0: 317.80000000000007
Est Profit Cohort 1: 65.6
Est Profit Cohort 2: 24.89999999999997
Est Profit Cohort 3: -247.20000000000005
Est Profit Cohort 4: 101.20000000000003
Est Profit Cohort 5: -55.60000000000002
Est Profit Cohort 6: -104.6
Est Profit Cohort 7: 119.49999999999997
Est Profit Cohort 8: 25.999999999999993
Est Profit Cohort 9: 63.4
Est Profit Cohort 10: 173.39999999999998
Est Profit Cohort 11: 19.000000000000004
Est Profit Cohort 12: 94.99999999999999
Est Profit Cohort 13: -96.39999999999998
Est Profit Cohort 14: 9.8
Est Profit Cohort 15: -80.80000000000001
Est Profit Cohort 16: -88.49999999999999
Est Profit Cohort 17: -2.9999999999999853
Est Profit Cohort 18: -61.80000000000002
Est Profit Cohort 19: -52.89999999999999
Est Profit Cohort 20: 4.099999999999994
Est Profit Cohort 21: -38.6
Est Profit Cohort 22: -81.19999999999999
Est Profit Cohort 23: 11.799999999999997
Est Profit Cohort 24: 77.60000000000002
Est Profit Cohort 25: 71.9
Est Profit Cohort 26: 75.899

In [8]:
df_predict.to_csv('2019_predict_5_svm.csv')