In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import Sequential, layers, optimizers, models
from random import randint

#* for small network, using GPU will not provide a lot of speed improvement -> the following line limits the machine to use only CPU instead
tf.config.set_visible_devices([], 'GPU')

# Data Processing

data from UCI Machine Learning Repositories: https://archive.ics.uci.edu/ml/datasets/SkillCraft1+Master+Table+Dataset

source: Thompson JJ, Blair MR, Chen L, Henrey AJ (2013) Video Game Telemetry as a Critical Tool in the Study of Complex Skill Learning. PLoS ONE 8(9): e75129.

In [2]:
data = pd.read_csv('../data/SkillCraft1_Dataset.csv', na_values='?')
data = data.drop(['GameID'], axis=1)

filtered_data = data[data[data.columns].notnull().all(1)] # filter out any row that contains missing value
filtered_data

Unnamed: 0,LeagueIndex,Age,HoursPerWeek,TotalHours,APM,SelectByHotkeys,AssignToHotkeys,UniqueHotkeys,MinimapAttacks,MinimapRightClicks,NumberOfPACs,GapBetweenPACs,ActionLatency,ActionsInPAC,TotalMapExplored,WorkersMade,UniqueUnitsMade,ComplexUnitsMade,ComplexAbilitiesUsed
0,5,27.0,10.0,3000.0,143.7180,0.003515,0.000220,7,0.000110,0.000392,0.004849,32.6677,40.8673,4.7508,28,0.001397,6,0.0,0.000000
1,5,23.0,10.0,5000.0,129.2322,0.003304,0.000259,4,0.000294,0.000432,0.004307,32.9194,42.3454,4.8434,22,0.001193,5,0.0,0.000208
2,4,30.0,10.0,200.0,69.9612,0.001101,0.000336,4,0.000294,0.000461,0.002926,44.6475,75.3548,4.0430,22,0.000745,6,0.0,0.000189
3,3,19.0,20.0,400.0,107.6016,0.001034,0.000213,1,0.000053,0.000543,0.003783,29.2203,53.7352,4.9155,19,0.000426,7,0.0,0.000384
4,3,32.0,10.0,500.0,122.8908,0.001136,0.000327,2,0.000000,0.001329,0.002368,22.6885,62.0813,9.3740,15,0.001174,4,0.0,0.000019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3334,4,20.0,8.0,400.0,158.1390,0.013829,0.000504,7,0.000217,0.000313,0.003583,36.3990,66.2718,4.5097,30,0.001035,7,0.0,0.000287
3335,5,16.0,56.0,1500.0,186.1320,0.006951,0.000360,6,0.000083,0.000166,0.005414,22.8615,34.7417,4.9309,38,0.001343,7,0.0,0.000388
3336,4,21.0,8.0,100.0,121.6992,0.002956,0.000241,8,0.000055,0.000208,0.003690,35.5833,57.9585,5.4154,23,0.002014,7,0.0,0.000000
3337,3,20.0,28.0,400.0,134.2848,0.005424,0.000182,5,0.000000,0.000480,0.003205,18.2927,62.4615,6.0202,18,0.000934,5,0.0,0.000000


# Model Construction

In [3]:
predict = 'LeagueIndex'
rank: dict = {1: 'Bronze', 2: 'Silver', 3: 'Gold', 4: 'Platinum', 5: 'Diamond', 6: 'Master', 7: 'GrandMaster', 8: 'Professional'}

x = np.array(filtered_data.drop([predict], axis=1))
y = np.array(filtered_data[predict])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

In [4]:
model = Sequential()
model.add(layers.Dense(16, input_shape=(18,), activation='sigmoid'))
model.add(layers.Dense(8, activation='sigmoid'))
# model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(8, activation='softmax')) #? Dense(8) works
model.summary()

#! using optimizer = 'adam' does not work for M1 architecture
model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

model.save('../model/8LeagueSkills_NeuralNetwork.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                304       
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                                 
 dense_2 (Dense)             (None, 8)                 72        
                                                                 
Total params: 512
Trainable params: 512
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10


2023-05-06 16:20:40.741782: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.36227545142173767


# Prediction

In [5]:
model: Sequential = models.load_model('../model/8LeagueSkills_NeuralNetwork.h5')
predictions = model.predict(x_train)

for _ in range(10):
    i = randint(0, len(predictions))
    print(f'Prediction: {rank[np.argmax(predictions[i])]}, Actual: {rank[y_train[i]]}')

Prediction: Silver, Actual: Bronze
Prediction: Silver, Actual: Bronze
Prediction: Platinum, Actual: Platinum
Prediction: Platinum, Actual: Bronze
Prediction: Diamond, Actual: Diamond
Prediction: Diamond, Actual: Platinum
Prediction: Diamond, Actual: Diamond
Prediction: Diamond, Actual: Master
Prediction: Platinum, Actual: Gold
Prediction: Platinum, Actual: Platinum


## 4 Leagues Prediction

In [6]:
fourLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver', 2: 'Gold-Platinum', 3: 'Diamond-Master', 4: 'GrandMaster-Professional'}
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 2
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 2
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 3
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 4
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 4
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 4

predict = 'LeagueIndex'

x = np.array(fourLeague_data.drop([predict], axis=1))
y = np.array(fourLeague_data[predict])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

model = Sequential()
model.add(layers.Dense(16, input_shape=(18,), activation='sigmoid'))
model.add(layers.Dense(8, activation='sigmoid'))
# model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(5, activation='softmax')) #? Dense(4) doesn't work
model.summary()

#! using optimizer = 'adam' does not work for M1 architecture
model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

model.save('../model/4LeagueSkills_NeuralNetwork.h5')


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 16)                304       
                                                                 
 dense_4 (Dense)             (None, 8)                 136       
                                                                 
 dense_5 (Dense)             (None, 5)                 45        
                                                                 
Total params: 485
Trainable params: 485
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.473053902387619


In [7]:
predictions = model.predict(x_train)

for _ in range(10):
    i = randint(0, len(predictions))
    print(f'Prediction: {rank[np.argmax(predictions[i])]}, Actual: {rank[y_train[i]]}')

Prediction: GrandMaster-Professional, Actual: GrandMaster-Professional
Prediction: Gold-Platinum, Actual: Gold-Platinum
Prediction: Gold-Platinum, Actual: Gold-Platinum
Prediction: Diamond-Master, Actual: Gold-Platinum
Prediction: Gold-Platinum, Actual: Diamond-Master
Prediction: Gold-Platinum, Actual: Gold-Platinum
Prediction: GrandMaster-Professional, Actual: GrandMaster-Professional
Prediction: Gold-Platinum, Actual: Gold-Platinum
Prediction: Diamond-Master, Actual: GrandMaster-Professional
Prediction: GrandMaster-Professional, Actual: Diamond-Master


## 3 Leagues Prediction

In [8]:
threeLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver-Gold', 2: 'Platinum-Diamond-Master', 3: 'GrandMaster-Professional'}
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 1
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 3
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 3

predict = 'LeagueIndex'

x = np.array(threeLeague_data.drop([predict], axis=1))
y = np.array(threeLeague_data[predict])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

model = Sequential()
model.add(layers.Dense(16, input_shape=(18,), activation='sigmoid'))
model.add(layers.Dense(8, activation='sigmoid'))
# model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(4, activation='softmax')) #? Dense(3) doesn't work
model.summary()

#! using optimizer = 'adam' does not work for M1 architecture
model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

model.save('../model/3LeagueSkills_NeuralNetwork.h5')

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 16)                304       
                                                                 
 dense_7 (Dense)             (None, 8)                 136       
                                                                 
 dense_8 (Dense)             (None, 4)                 36        
                                                                 
Total params: 476
Trainable params: 476
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.796407163143158


In [9]:
predictions = model.predict(x_train)

for _ in range(10):
    i = randint(0, len(predictions))
    print(f'Prediction: {rank[np.argmax(predictions[i])]}, Actual: {rank[y_train[i]]}')

Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master
Prediction: Bronze-Silver-Gold, Actual: Bronze-Silver-Gold
Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master
Prediction: Bronze-Silver-Gold, Actual: Bronze-Silver-Gold
Prediction: Bronze-Silver-Gold, Actual: Bronze-Silver-Gold
Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master
Prediction: Bronze-Silver-Gold, Actual: Bronze-Silver-Gold
Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master
Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master
Prediction: Platinum-Diamond-Master, Actual: Platinum-Diamond-Master


## 2 Leagues Prediction

In [10]:
twoLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver-Gold-Platinum', 2: 'Diamond-Master-GrandMaster-Professional'}
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 2

predict = 'LeagueIndex'

x = np.array(twoLeague_data.drop([predict], axis=1))
y = np.array(twoLeague_data[predict])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

model = Sequential()
model.add(layers.Dense(16, input_shape=(18,), activation='sigmoid'))
model.add(layers.Dense(8, activation='sigmoid'))
# model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(3, activation='softmax')) #? Dense(2) doesn't work
model.summary()

#! using optimizer = 'adam' does not work for M1 architecture
model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

model.save('../model/2LeagueSkills_NeuralNetwork.h5')

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 16)                304       
                                                                 
 dense_10 (Dense)            (None, 8)                 136       
                                                                 
 dense_11 (Dense)            (None, 3)                 27        
                                                                 
Total params: 467
Trainable params: 467
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.7694610953330994


In [11]:
predictions = model.predict(x_train)

for _ in range(10):
    i = randint(0, len(predictions))
    print(f'Prediction: {rank[np.argmax(predictions[i])]}, Actual: {rank[y_train[i]]}')

Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
Prediction: Diamond-Master-GrandMaster-Professional, Actual: Diamond-Master-GrandMaster-Professional
Prediction: Diamond-Master-GrandMaster-Professional, Actual: Diamond-Master-GrandMaster-Professional
Prediction: Diamond-Master-GrandMaster-Professional, Actual: Diamond-Master-GrandMaster-Professional
Prediction: Diamond-Master-GrandMaster-Professional, Actual: Diamond-Master-GrandMaster-Professional
Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
Prediction: Bronze-Silver-Gold-Platinum, Actual: Bronze-Silver-Gold-Platinum
