In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('datafile.csv')

In [3]:
data.describe()

Unnamed: 0,RANK,FILE,SCORE,TIME,MOVES
count,47278.0,47278.0,47278.0,47278.0,47278.0
mean,3.840306,3.541309,-2.606667,754.948271,47.939549
std,2.143153,2.248801,11.123017,188.877364,9.602971
min,0.0,0.0,-37.0,-0.036504,20.0
25%,2.0,2.0,-7.0,709.242565,44.0
50%,4.0,4.0,-1.0,838.929584,49.0
75%,6.0,5.0,2.0,879.465136,59.0
max,7.0,7.0,37.0,899.999661,83.0


In [4]:
# -37 to -13 Low
# -12 to 12  Medium
# 13  to 37  High

categories = [data["SCORE"].between(-37, -13), data["SCORE"].between(-12, 12), data['SCORE'].between(13, 37)]
values = ["LOW", "MEDIUM", "HIGH"]
data["SCORE"] = np.select(categories, values, 0)

In [5]:
X = data.drop(["SCORE"], axis=1)
Y = data["SCORE"].map({"HIGH":1, "MEDIUM":1, "LOW": 0})
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

# Logistic Regression

Logistic Regression does not work too well.  I mean it has a 86% success rate which could be better

In [6]:
model = LogisticRegression().fit(X_train, Y_train)



In [7]:
model.score(X_test, Y_test)

0.8667478528393796

# Neural Network

We'll see if NN will work any better

In [8]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from math import floor, ceil
from pylab import rcParams

%matplotlib inline

In [9]:
def encode(series): 
    return pd.get_dummies(series.astype(str))
X = data.drop(["SCORE"], axis=1)
Y = encode(data.SCORE)

In [10]:
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.1)

In [11]:
def multilayer_perceptron(x, weights, biases, keep_prob):
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_1 = tf.nn.dropout(layer_1, keep_prob)
    out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
    return out_layer


n_hidden_1 = 10
n_input = train_x.shape[1]
n_classes = train_y.shape[1]

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

keep_prob = tf.placeholder("float")



Instructions for updating:
Colocations handled automatically by placer.


In [31]:
training_epochs = 2000
display_step = 10
batch_size = 300

x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

In [32]:
predictions = multilayer_perceptron(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)


In [33]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(len(train_x) / batch_size)
        x_batches = np.array_split(train_x, total_batch)
        y_batches = np.array_split(train_y, total_batch)
        for i in range(total_batch):
            batch_x, batch_y = x_batches[i], y_batches[i]
            _, c = sess.run([optimizer, cost], 
                            feed_dict={
                                x: batch_x, 
                                y: batch_y, 
                                keep_prob: 0.8
                            })
            avg_cost += c / total_batch
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", \
                "{:.9f}".format(avg_cost))
    print("Optimization Finished!")
    correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({x: test_x, y: test_y, keep_prob: 1.0}))

Epoch: 0001 cost= 2162.639341969
Epoch: 0011 cost= 400.068775312
Epoch: 0021 cost= 136.788903970
Epoch: 0031 cost= 42.472406253
Epoch: 0041 cost= 2.776858679
Epoch: 0051 cost= 0.630116126
Epoch: 0061 cost= 0.627513955
Epoch: 0071 cost= 0.625356512
Epoch: 0081 cost= 0.623504923
Epoch: 0091 cost= 0.621772409
Epoch: 0101 cost= 0.619652261
Epoch: 0111 cost= 0.619107620
Epoch: 0121 cost= 0.607533794
Epoch: 0131 cost= 0.597433243
Epoch: 0141 cost= 0.592043585
Epoch: 0151 cost= 0.589367825
Epoch: 0161 cost= 0.589923530
Epoch: 0171 cost= 0.589479073
Epoch: 0181 cost= 0.587227967
Epoch: 0191 cost= 0.587640151
Epoch: 0201 cost= 0.586071797
Epoch: 0211 cost= 0.584592795
Epoch: 0221 cost= 0.585368422
Epoch: 0231 cost= 0.583860227
Epoch: 0241 cost= 0.582502192
Epoch: 0251 cost= 0.578363417
Epoch: 0261 cost= 0.571404827
Epoch: 0271 cost= 0.567534147
Epoch: 0281 cost= 0.556569130
Epoch: 0291 cost= 0.550654686
Epoch: 0301 cost= 0.542371515
Epoch: 0311 cost= 0.539735193
Epoch: 0321 cost= 0.536647181
Ep

In [None]:
with tf.Session() as sess:
    print("Accuracy:", accuracy.eval({x: test_x, y: test_y, keep_prob: 1.0}))