In [9]:
import pandas as pd
import pickle

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Reading in our data

In [10]:
df = pd.read_csv("../data/500k_positional_reinf.csv", index_col=False)
df = df.loc[df["outcome"] == 1]
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,label,outcome
1,-1,0,0,0,-1,1,1,-1,1,4,1
3,0,0,0,0,-1,0,1,1,-1,6,1
4,0,-1,0,-1,1,1,0,0,0,9,1
5,0,-1,-1,-1,1,1,-1,1,1,1,1
6,1,0,0,-1,-1,1,0,-1,0,3,1
...,...,...,...,...,...,...,...,...,...,...,...
3813334,0,0,-1,0,1,1,-1,1,-1,4,1
3813335,0,0,0,0,0,1,0,0,0,8,1
3813339,0,0,0,0,0,-1,1,0,0,4,1
3813344,0,0,0,0,0,0,0,0,0,6,1


Splitting data into features and labels, then swapping to vectorized format.

In [11]:
cols = [str(x) for x in range(1, 10)]
X = df[cols].values
y = df["label"].values
X, y

(array([[-1,  0,  0, ...,  1, -1,  1],
        [ 0,  0,  0, ...,  1,  1, -1],
        [ 0, -1,  0, ...,  0,  0,  0],
        ...,
        [ 0,  0,  0, ...,  1,  0,  0],
        [ 0,  0,  0, ...,  0,  0,  0],
        [ 0, -1,  0, ..., -1,  0,  1]], dtype=int64),
 array([4, 6, 9, ..., 4, 6, 8], dtype=int64))

Splitting training and validation data.

In [12]:
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)

In [13]:
model = MLPClassifier(hidden_layer_sizes=(36, 36, 36, 36, 36, 36), random_state=1, max_iter=200)
model.fit(train_X, train_y)

In [14]:
train_preds = model.predict(train_X)
predictions = model.predict(val_X)
accuracy_score(predictions, val_y), accuracy_score(train_preds, train_y)

(0.2859370765478332, 0.28932681157754003)

In [15]:
model.loss_curve_

[1.7252978503770395,
 1.6120758646647697,
 1.5988957668037302,
 1.5908806310500228,
 1.5867399357121361,
 1.5841118216535324,
 1.5821773975234994,
 1.5808983398844918,
 1.5798490140659966,
 1.5789463494977805,
 1.5782939422901234,
 1.5775620833725787,
 1.5768375049126384,
 1.575995037337626,
 1.5754411268605084,
 1.5749777300944894,
 1.5744973515264928,
 1.5740617711858444,
 1.5736731801275563,
 1.5733441750469013,
 1.5730147498085938,
 1.5729516409973465,
 1.5725343821111963,
 1.5722211738995913,
 1.5718819279013347,
 1.5717901112290804,
 1.5713489871753066,
 1.571260197168533,
 1.571083354669032,
 1.5710489516634685,
 1.5707926327107202,
 1.5708121272003117,
 1.570305464068077,
 1.5704728016614335,
 1.5702786591102125,
 1.5699941797199313,
 1.5698807556177479,
 1.5696810982681497,
 1.56970869877824,
 1.569588251627149,
 1.5694249682255235,
 1.5693340825625492,
 1.5693549787467629,
 1.5693183771912087,
 1.5693224904599572,
 1.569133855420282,
 1.5688996307630212,
 1.5690972101724994,


In [16]:
pickle.dump(model, open("../model_binaries/MLP_positional_classifier.sav", "wb"))