In [39]:
%matplotlib inline
from fenparsev4 import *
from pybrain.datasets import ClassificationDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules import TanhLayer
import numpy as np
import random
from sklearn.metrics import confusion_matrix
import os
from __future__ import print_function
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils

In [38]:
#most important part
def fries_ready():
    os.system('say your fries are done')
    
def write(str):
    sys.stdout.write('\r' + str)
    sys.stdout.flush()
    
def writeln(str):
    sys.stdout.write(str)
    sys.stdout.flush()

num_files = 75
filename_prefix='/media/drive/storage/csv_input/2015-12-08_112mil'
filename_suffix_range=range(1,num_files + 1)
debug=True
    
#read in csv
df = pd.DataFrame()
writeln("Inizializing read of %d files...\n" % (num_files))
for i in filename_suffix_range:
    if debug: write("Reading...%d/%d" % (i, num_files))
    df = df.append(pd.read_csv(filename_prefix + str(i)))
write("Reading...done\n")
#clean columns
df['y'] = df['y'].astype(int)
if debug: writeln("Converting to list...")
df['x'] = df['x'] = df.loc[:, 'x'].apply(lambda x: [1 if '1' == a else 0 for a in x.split(', ')])
length = df.shape[0]
df = df.set_index([range(0,length)])
writeln("done\nShuffling data...")
df = df.reindex(np.random.permutation(df.index))
writeln("done")
write("Splitting data...")
split = df.shape[0] * 4 / 5
all_train = df.iloc[:split, :]
all_test = df.iloc[split:, :]
writeln("done\n")

Inizializing read of 75 files...
Reading...done
Converting to list...done
Splitting data...done


In [40]:
(X_train, Y_train, X_test, Y_test) = build_dataset(all_train, all_test)

building y labels
converting X_train and X_test to nparrays
converting y labels to categorical


In [41]:
#takes in full dataframe and converts to usable dataset
def build_dataset(all_train, all_test, nb_classes=2, debug=True):
    X_train = list(all_train['x'])
    X_test = list(all_test['x'])
    if debug: print("building y labels")
    y_train = [[1] if y == 1 else [0] for y in all_train['y']]
    y_test = [1 if y == 1 else 0 for y in all_test['y']]
    if debug: print("converting X_train and X_test to nparrays")
    X_train = np.array(X_train)
    X_test = np.array(X_test)
    if debug: print("converting y labels to categorical")
    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    return (X_train, Y_train, X_test, Y_test)

In [None]:
def buildMLP(activation='tanh',depth=3):
    if depth < 2:
        depth = 2
    model = Sequential()
    model.add(Dense(512, input_shape=(1536,)))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    for i in range(0, depth - 2):
        model.add(Dense(512))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
       
    model.add(Dense(2))
    model.add(Activation('softmax'))

    rms = RMSprop()
    model.compile(loss='categorical_crossentropy', optimizer=rms)
    #print(model.to_json())
    writeln("Model with depth %d built..." % depth)
    return model

In [None]:
class KerasExperiment:
    def __init__(self, model, X_train, Y_train, X_test, Y_test, epochs=5, verbose=True):
        self.model = model
        self.X_train = X_train
        self.Y_train = Y_train
        self.X_test = X_test
        self.Y_test = Y_test
        self.nb_epoch = epochs

    #adds specific piece confusion matrices to results dict d
    def add_piece_specifics(d):
        pass
        
    
    def run_experiment(self):
        self.model.fit(self.X_train, self.Y_train, nb_epoch=self.nb_epoch,
                  show_accuracy=True, verbose=2,
                  validation_data=(X_test, Y_test))
        score = self.model.evaluate(X_test, Y_test,
                               show_accuracy=True, verbose=0)
    #    print(confusion_matrix(y_train, out))
        #return pd.DataFrame({"train_size": self.train_df.shape[0], 
#                             "train_white_count" : sum([1 if a.isupper() else 0 for a in self.train_df['piece_moved']]),
#                             "confusion_matrix" : [cm],
#                             "accuracy": [(cm[0][0] + cm[1][1]) * 1.0 / (sum([sum(c) for c in cm]))]})

In [None]:
for i in range(2, 6):
    writeln("Building net of depth %d...\n" % i)
    net = buildMLP(i)
    writeln("Running experiment:")
    e = KerasExperiment(net, X_train, Y_train, X_test, Y_test)
    results_df = e.run_experiment()
fries_ready()

Building net of depth 2...Train on 584268 samples, validate on 146068 samples
Epoch 1/5
163s - loss: 0.6103 - acc: 0.6500 - val_loss: 0.6068 - val_acc: 0.6658


In [48]:
results_df

Unnamed: 0,accuracy,confusion_matrix,train_size,train_white_count
0,0.491254,"[[674, 3], [695, 0]]",24584,24584


In [10]:
y_true = pd.Series(y_test)
y_pred = pd.Series(y_pred)
pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Predicted'], margins=True)

Predicted,0,1,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,125,119,244
1,119,137,256
All,244,256,500


In [15]:
def collect_results(test_df, predicted_y)
    result_df = pd.DataFrame(columns=['training_instances', 'testingpct_white_moves',])
    
    #add predicted
    length = all_test.shape[0]
    test_df = all_test.set_index([range(0,length)])
    test_df.loc[:,'predicted'] = y_pred
    
    #calculate overall confusion matrix
    cm = confusion_matrx(test_df['y'], predicted_y)
    
    #calculate each piece confusion matrix
    for p in "pPrRnNbBqQkK":
        specific_piece = all_test[all_test['piece_moved'] == p]
        cm = confusion_matrix(specific_piece['y'], specific_piece['predicted'])
        test_df.loc[:, p + '_perf'] = cm
#         print(cm)
#         print(1.0 * cm[0][0] / (sum([sum(a) for a in cm])))

p
[[36 20]
 [29 16]]
0.356435643564
P
[[13 23]
 [16 23]]
0.173333333333
r
[[ 5  7]
 [ 8 10]]
0.166666666667
R
[[14  6]
 [12 12]]
0.318181818182
n
[[17  4]
 [11  6]]
0.447368421053
N
[[ 4 11]
 [ 4 12]]
0.129032258065
b
[[6 3]
 [8 8]]
0.24
B
[[ 5 11]
 [ 2 16]]
0.147058823529
q
[[11 10]
 [ 6 11]]
0.289473684211
Q
[[ 9  8]
 [ 7 10]]
0.264705882353
k
[[2 9]
 [8 6]]
0.08
K
[[3 7]
 [8 7]]
0.12
