In [1]:
# Imports
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense,Dropout
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
import ast

In [2]:
# Loading the data set.
gameDf = pd.read_csv('gameDf.csv')

In [3]:
# The csv has the list of chat messages as a single string. This function recovers the python list.
gameDf['chatsclean'] = gameDf['chatsclean'].apply(ast.literal_eval)

In [4]:
# BoW encoding, text has already been processed.
vectorizer = CountVectorizer(tokenizer=lambda x: x, preprocessor=lambda x: x,lowercase=False)

In [5]:
# Creating machine learning matricies and performing train-test split for validation.
X = vectorizer.fit_transform(gameDf['chatsclean'].tolist()).toarray()
y = gameDf['result'].to_numpy()
XTrain, XTest, yTrain, yTest = train_test_split(X,y,random_state=42)

In [6]:
# Building the neural network layers, one layer with 50% dropout.
model = Sequential()
model.add(Dense(16, input_dim=X.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [7]:
# Compiling the model, I included accuracy to get a quick glance at model performance when fitting.
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
# A callback function to stop training when validation set loss begins to increase to prevent overfitting.
es = EarlyStopping(monitor='val_loss',min_delta=0,patience=2,verbose=0, mode='auto')

In [9]:
# Fitting the neural network using the same train/test data as the other models.
model.fit(XTrain, yTrain, validation_data=(XTest,yTest), epochs=50, batch_size=32, callbacks=es)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


<tensorflow.python.keras.callbacks.History at 0x7f62a803a7f0>

In [10]:
# Classification report to compare f1-score to other models.
print(classification_report(yTest, np.round(model.predict(XTest)))) 

              precision    recall  f1-score   support

           0       0.59      0.45      0.51        42
           1       0.63      0.75      0.68        52

    accuracy                           0.62        94
   macro avg       0.61      0.60      0.60        94
weighted avg       0.61      0.62      0.61        94



In [12]:
# Fraction of "win" predictions.
sum(np.round(model.predict(XTest)))/len(np.round(model.predict(XTest)))

array([0.65957445], dtype=float32)