In [514]:
# load dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [515]:
# Load data and select trainging hour and test hour
trainhour = 5
testhour = 7
df = pd.read_csv('series.csv')
cols5 = ['21', '22', '23', '24', '25']
cols10 = ['21', '22', '23', '24', '25', '26', '27', '28', '29', '30']
#cols5 = ['31', '32', '33', '34', '35']
#cols10 = ['31', '32', '33', '34', '35', '36', '37', '38', '39', '40']
df['sum5'] = df[cols5].sum(axis=1)
df['sum10'] = df[cols10].sum(axis=1)
data = df[(df['hour'] == trainhour) & (df['month'] == 6)]
testdata = df[(df['hour'] == testhour) & (df['month'] == 6)]
del df

In [516]:
# Categorize the 'sum' column the number of PIPs over selected range
def setlabels(x):
    if x < -10:
        return -1
    elif x > 10:
        return 1
    else:
        return 0

data["labels"] = data["sum10"].apply(setlabels)

In [517]:
# Get X, y for model
X = data.iloc[:,8:28].to_numpy()

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
y = data['labels']
label_encoder = LabelEncoder()
label_encoder.fit(y)
y_encoded = label_encoder.transform(y)
y = to_categorical(y_encoded)

In [518]:
# Kmeans Model
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=20)
kmeans.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=20, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [519]:
# Random forrest model
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=40)
rf = rf.fit(X, y_encoded)

In [520]:
# SVM Model
from sklearn.svm import SVC 
svm = SVC(kernel='poly',gamma='auto',C=.25)
svm.fit(X, y_encoded)

SVC(C=0.25, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [521]:
# Neural Network model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
cnn = Sequential()
cnn.add(Dense(units=50, activation='relu', input_dim=X.shape[1]))
cnn.add(Dense(units=50, activation='relu'))
cnn.add(Dense(units=y.shape[1], activation='softmax'))
cnn.summary()

cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn.fit(X, y, epochs=5, shuffle=True, verbose=0)

Model: "sequential_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_108 (Dense)            (None, 50)                1050      
_________________________________________________________________
dense_109 (Dense)            (None, 50)                2550      
_________________________________________________________________
dense_110 (Dense)            (None, 3)                 153       
Total params: 3,753
Trainable params: 3,753
Non-trainable params: 0
_________________________________________________________________


<tensorflow.python.keras.callbacks.History at 0x20d2335e4e0>

In [522]:
# Model loss and Accuray
print(f"Random Forrest - Score: {rf.score(X, y_encoded)}")
print(f"Support Vector - Score: {svm.score(X, y_encoded)}")
cnn_loss, cnn_accuracy = cnn.evaluate(X, y, verbose=0)
print(f"Normal Neural Network - Loss: {cnn_loss}, Accuracy: {cnn_accuracy}")

Random Forrest - Score: 0.9992063492063492
Support Vector - Score: 0.9928571428571429
Normal Neural Network - Loss: 0.136023119757218, Accuracy: 0.970634937286377


In [523]:
# Set Kmeans groups on testdata
data["grp"] = kmeans.labels_
data_group = data.groupby('grp').agg(['sum','count', 'min', 'max'])
buygroup = data_group['labels'][['sum']].idxmax()[0]
sellgroup = data_group['labels'][['sum']].idxmin()[0]
def setkmeans(x):
    if x == sellgroup:
        return -1
    elif x == buygroup:
        return 1
    else:
        return 0

In [524]:
# Pull out validation data and make predicts
formodel = testdata.iloc[:,8:28].to_numpy()
testdata['grp'] = kmeans.fit_predict(formodel)
testdata["kmn"] = testdata["grp"].apply(setkmeans)
testdata['rf'] = label_encoder.inverse_transform(rf.predict(formodel))
testdata['svm'] = label_encoder.inverse_transform(svm.predict(formodel))
testdata['cnn'] = label_encoder.inverse_transform(cnn.predict_classes(formodel))
testdata['abs'] = testdata['kmn'].abs() + testdata['rf'].abs() + testdata['svm'].abs() + testdata['cnn'].abs()
testdata = testdata.drop(testdata[(testdata['abs'] == 0)].index)
testdata['tot'] = testdata['kmn'] + testdata['rf'] + testdata['svm'] + testdata['cnn']
testdata = testdata.drop(testdata[(testdata['tot'] == -1)].index)
testdata = testdata.drop(testdata[(testdata['tot'] == 0)].index)
testdata = testdata.drop(testdata[(testdata['tot'] == 1)].index)

In [525]:
totalrows = testdata.shape[0]
totalcols = testdata.shape[1]
profit = []
for i in range(totalrows):      
    if (testdata.iloc[i,totalcols-1] < -1):
        temp = -testdata['sum10'].iloc[i]
    else:
        temp = testdata['sum10'].iloc[i]
    
    profit.append(temp)

print("Est Profit: " + testdata.columns[totalcols-1] + " " + str(sum(profit)))

Est Profit: tot 32.8


In [526]:
totalrows = testdata.shape[0]
totalcols = testdata.shape[1]
profit = []
for i in range(totalrows):
    temp = 0
    for j in range(10):
        temp += testdata.iloc[i,28+j]
        if (testdata.iloc[i,totalcols-1] < -1):
            if(temp > 2):
              break  
        elif (testdata.iloc[i,totalcols-1] > 1):
            if(temp < -2):
              break
        else:
            temp = 0
            
    if (testdata.iloc[i,totalcols-1] < -1):
        temp = -temp
    profit.append(temp)

print("Est Profit: " + testdata.columns[totalcols-1] + " " + str(sum(profit)))

Est Profit: tot 32.8


In [528]:
#testdata.to_csv('test_models.csv')

In [529]:
profit

[32.8]