In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pickle

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import html, dcc, Input, Output

citation: Scikit-learn: Machine Learning in Python, Pedregosa et al., JMLR 12, pp. 2825-2830, 2011.

# Data Processing

data from UCI Machine Learning Repositories: https://archive.ics.uci.edu/ml/datasets/SkillCraft1+Master+Table+Dataset

source: Thompson JJ, Blair MR, Chen L, Henrey AJ (2013) Video Game Telemetry as a Critical Tool in the Study of Complex Skill Learning. PLoS ONE 8(9): e75129.

In [None]:
data = pd.read_csv('../data/SkillCraft1_Dataset.csv', na_values='?')
data = data.drop(['GameID'], axis=1)
# data = data[['LeagueIndex', 'Age', 'HoursPerWeek', 'TotalHours', 'APM']]

filtered_data = data[data[data.columns].notnull().all(1)] # filter out any row that contains missing value
filtered_data

# Model Construction

In [None]:
predict = 'LeagueIndex'

x = np.array(filtered_data.drop([predict], axis=1))
y = np.array(filtered_data[predict])

print('Model Construction\n------------------')
best_acc = 0
for _ in range(10):
    for k in range(3, 14, 2):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

        model = KNeighborsClassifier(n_neighbors=k)
        
        model.fit(x_train, y_train)
        acc = model.score(x_test, y_test)
        if acc > best_acc:
            best_acc = acc
            with open('../model/8LeagueSkills_KNearestNeighborModel.pickle', 'wb') as f:
                pickle.dump(model, f)
                print(f'New most accurate model ({best_acc}) using {k} neighbors is saved!')

# Prediction

In [None]:
model = open('../model/8LeagueSkills_KNearestNeighborModel.pickle', 'rb')
model: KNeighborsClassifier = pickle.load(model)

In [None]:
print('\nPredition\n---------')
rank: dict = {1: 'Bronze', 2: 'Silver', 3: 'Gold', 4: 'Platinum', 5: 'Diamond', 6: 'Master', 7: 'GrandMaster', 8: 'Professional'}

predictions = model.predict(x_test)

for i, prediction in enumerate(predictions):
    if i < 10:
        try:
            print(f'Prediction: {rank[prediction]}, Actual: {rank[y_test[i]]}')
        except KeyError:
            print(f'Prediction: Unknown, Actual: {rank[y_test[i]]}')
    else:
        break

## 4 Leagues Categorization

In [None]:
fourLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver', 2: 'Gold-Platinum', 3: 'Diamond-Master', 4: 'GrandMaster-Professional'}
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 2
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 2
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 3
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 4
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 4
fourLeague_data.loc[fourLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 4

predict = 'LeagueIndex'

x = np.array(fourLeague_data.drop([predict], axis=1))
y = np.array(fourLeague_data[predict])

print('Model Construction\n------------------')
best_acc = 0
for _ in range(10):
    for k in range(3, 14, 2):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

        model = KNeighborsClassifier(n_neighbors=k)
        
        model.fit(x_train, y_train)
        acc = model.score(x_test, y_test)
        if acc > best_acc:
            best_acc = acc
            with open('../model/4LeagueSkills_KNearestNeighborModel.pickle', 'wb') as f:
                pickle.dump(model, f)
                print(f'New most accurate model ({best_acc}) using {k} neighbors is saved!')

print('\nPredition\n---------')
predictions = model.predict(x_test)

for i, prediction in enumerate(predictions):
    if i < 10:
        try:
            print(f'Prediction: {rank[prediction]}, Actual: {rank[y_test[i]]}')
        except KeyError:
            print(f'Prediction: Unknown, Actual: {rank[y_test[i]]}')
    else:
        break

## 3 Leagues Categorization

In [None]:
threeLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver-Gold', 2: 'Platinum-Diamond-Master', 3: 'GrandMaster-Professional'}
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 1
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 2
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 3
threeLeague_data.loc[threeLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 3

predict = 'LeagueIndex'

x = np.array(threeLeague_data.drop([predict], axis=1))
y = np.array(threeLeague_data[predict])

print('Model Construction\n------------------')
best_acc = 0
for _ in range(10):
    for k in range(3, 14, 2):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

        model = KNeighborsClassifier(n_neighbors=k)
        
        model.fit(x_train, y_train)
        acc = model.score(x_test, y_test)
        if acc > best_acc:
            best_acc = acc
            with open('../model/3LeagueSkills_KNearestNeighborModel.pickle', 'wb') as f:
                pickle.dump(model, f)
                print(f'New most accurate model ({best_acc}) using {k} neighbors is saved!')

print('\nPredition\n---------')
predictions = model.predict(x_test)

for i, prediction in enumerate(predictions):
    if i < 10:
        try:
            print(f'Prediction: {rank[prediction]}, Actual: {rank[y_test[i]]}')
        except KeyError:
            print(f'Prediction: Unknown, Actual: {rank[y_test[i]]}')
    else:
        break

## 2 Leagues Categorization

In [None]:
twoLeague_data = filtered_data.copy()

rank: dict = {1: 'Bronze-Silver-Gold-Platinum', 2: 'Diamond-Master-GrandMaster-Professional'}
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 2, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 3, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 4, 'LeagueIndex'] = 1
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 5, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 6, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 7, 'LeagueIndex'] = 2
twoLeague_data.loc[twoLeague_data['LeagueIndex'] == 8, 'LeagueIndex'] = 2

predict = 'LeagueIndex'

x = np.array(twoLeague_data.drop([predict], axis=1))
y = np.array(twoLeague_data[predict])

print('Model Construction\n------------------')
best_acc = 0
for _ in range(10):
    for k in range(3, 14, 2):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

        model = KNeighborsClassifier(n_neighbors=k)

        model.fit(x_train, y_train)
        acc = model.score(x_test, y_test)
        if acc > best_acc:
            best_acc = acc
            with open('../model/3LeagueSkills_KNearestNeighborModel.pickle', 'wb') as f:
                pickle.dump(model, f)
                print(f'New most accurate model ({best_acc}) using {k} neighbors is saved!')

print('\nPredition\n---------')
predictions = model.predict(x_test)

for i, prediction in enumerate(predictions):
    if i < 10:
        try:
            print(f'Prediction: {rank[prediction]}, Actual: {rank[y_test[i]]}')
        except KeyError:
            print(f'Prediction: Unknown, Actual: {rank[y_test[i]]}')
    else:
        break

# Visualization Tool

In [None]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([
        html.Div([
            dcc.Dropdown(
                options=list(filtered_data.columns),
                value='LeagueIndex',
                id='y-axis'),
            dcc.RadioItems(
                options=['linear', 'log'],
                value='linear',
                id='y-axis-type',
                inline=True)], 
            style={'width': '48%', 'display': 'inline-block'}),
        html.Div([
            dcc.Dropdown(
                options=list(filtered_data.columns),
                value='HoursPerWeek',
                id='x-axis'),
            dcc.RadioItems(
                options=['linear', 'log'],
                value='linear',
                id='x-axis-type',
                inline=True)],
            style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ]),
    dcc.Graph(id='graph', figure={}),
])

@app.callback(
    Output('graph', 'figure'),
    Input('y-axis', 'value'),
    Input('y-axis-type', 'value'),
    Input('x-axis', 'value'),
    Input('x-axis-type', 'value'))
def update_graph(y_axis, y_axis_type, x_axis, x_axis_type):
    fig = px.scatter(
        filtered_data,
        y=y_axis if y_axis_type == 'linear' else np.log10(filtered_data[y_axis]),
        x=x_axis if x_axis_type == 'linear' else np.log10(filtered_data[x_axis]),
        color='LeagueIndex',
        color_continuous_scale=px.colors.sequential.Rainbow,
        title="Custom Graph Generator")
    return fig

# Run app and display result inline in the notebook
app.run_server(mode='inline')