In [8]:
# Import libraries
import numpy as np
import pandas as pd
#For plotting
import matplotlib.pyplot as plt
import seaborn as sns
# For clustering
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

# For modelling
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

In [9]:
#read val-matches-cleaned.csv
val_matches = pd.read_csv('./Data files/val-matches-cleaned.csv')


In [10]:
def player_id_list(results, team_number):
    list_of_player_ids = []

    for index, row in results.iterrows():
        player_ids = row['Team ' + team_number + ' Player Ids']
        player_ids = player_ids[1:-1]
        player_ids = player_ids.split(',')
        player_ids = [int(i) for i in player_ids]
        list_of_player_ids.append(player_ids)
    return list_of_player_ids

In [11]:
val_matches['Team 1 Player Ids'] = player_id_list(val_matches, '1')
val_matches['Team 2 Player Ids'] = player_id_list(val_matches, '2')

In [12]:
from pathlib import Path  
acs = pd.read_csv('./Data files/acs.csv')
agent_wr = pd.read_csv('./Data files/agent-win-rates.csv')
agent_picks = pd.read_csv('./Data files/agent-picks.csv')
aim = pd.read_csv('./Data files/aim.csv')
assists = pd.read_csv('./Data files/assists.csv')
clutches = pd.read_csv('./Data files/clutches.csv')
damage = pd.read_csv('./Data files/damage.csv')
defusals = pd.read_csv('./Data files/defusals.csv')
headshots = pd.read_csv('./Data files/headshots.csv')
kd = pd.read_csv('./Data files/kd.csv')
multikill = pd.read_csv('./Data files/multi-kills.csv')
plants = pd.read_csv('./Data files/plants.csv')
spend = pd.read_csv('./Data files/spend.csv')
playernames = pd.read_csv('./Data files/player-names.csv')
playernames.drop(['Team'], axis=1, inplace=True)
#weapon_picks = pd.read_csv('./Data files/weapon-picks.csv')


# Merge the dataframes
df = pd.merge(acs, aim, on=['Ign', 'Team'])
#df = pd.merge(df, agent_picks, on=['Ign', 'Team'])
#df = pd.merge(df, aim, on=['Ign', 'Team'])
df = pd.merge(df, clutches, on=['Ign', 'Team'])
df = pd.merge(df, damage, on=['Ign', 'Team'])
df = pd.merge(df, defusals, on=['Ign', 'Team'])
df = pd.merge(df, headshots, on=['Ign', 'Team'])
df = pd.merge(df, kd, on=['Ign', 'Team'])
df = pd.merge(df, multikill, on=['Ign', 'Team'])
df = pd.merge(df, plants, on=['Ign', 'Team'])
df = pd.merge(df, spend, on=['Ign', 'Team'])
df = pd.merge(df, playernames, on='Ign', how='inner')
#df = pd.merge(df, weapon_picks, on=['Ign', 'Team'])

df.drop_duplicates(subset=['Ign', 'Team'], inplace=True)

filepath = Path('./Data files/merged.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df.to_csv(filepath)  
df.head(5)

Unnamed: 0,Ign,Team,Acs,Matches_x,Headshot %,Bodyshot %,Legshot %,Matches_y,Opponents,Clutches,...,Defusals,Headshots,Kills,Deaths,Assists,Kd,Multikills,Plants,Spend,Player Id
0,valyn,The Guard (ex),399,55,24.24,69.53,6.21,55,,12,...,18,605,795,783,403,1.02,190,220,2891900,1804
1,Chronicle,M3 Champions (ex),394,47,22.23,72.46,5.3,47,,11,...,13,527,573,501,185,1.14,356,56,2519150,274
65,Chronicle,Gambit Esports (ex),284,5,24.84,71.01,4.14,5,,4,...,3,78,117,75,57,1.56,33,4,293850,274
66,nAts,M3 Champions (ex),392,47,31.02,64.4,4.57,47,,21,...,13,702,834,674,290,1.24,370,48,2517500,1081
130,nAts,Gambit Esports (ex),146,5,26.59,69.14,4.25,5,,2,...,3,50,61,80,31,0.76,15,18,315200,1081


In [13]:
# Create an elo system for each player
# Create a dictionary of players and their elo
elo_dict = {}
for index, row in df.iterrows():
    if row['Player Id'] not in elo_dict:
        elo_dict[row['Player Id']] = 1000

# Players with higher kd should have higher elo
# Players with higher acs should have higher elo

# Create a dictionary of players and their acs
acs_dict = {}
for index, row in df.iterrows():
    if row['Player Id'] not in acs_dict:
        acs_dict[row['Player Id']] = row['Acs']

# Create a dictionary of players and their kd
kd_dict = {}
for index, row in df.iterrows():
    if row['Player Id'] not in kd_dict:
        kd_dict[row['Player Id']] = row['Kd']
print(val_matches['Team 1 Player Ids'])


0       [1403, 17788, 1690, 2056, 10211]
1       [2056, 17788, 1403, 1690, 10211]
2         [1826, 2377, 2594, 2769, 2940]
3         [1826, 2377, 2769, 2594, 2940]
4         [1826, 2377, 2940, 2769, 2594]
                      ...               
1583       [3211, 2841, 398, 2483, 2716]
1584       [2483, 2841, 3211, 398, 2716]
1585       [2841, 398, 2483, 2716, 3211]
1586       [2483, 2841, 398, 2716, 3211]
1587       [2841, 3211, 398, 2716, 2483]
Name: Team 1 Player Ids, Length: 1588, dtype: object


In [14]:
# Modify elo based on win rate kd and acs
for index, row in val_matches.iterrows():
    if row['Winner'] == 1:
        for player in row['Team 1 Player Ids']:
            elo_dict[player] += 100 * kd_dict[player]
            elo_dict[player] += 100 * acs_dict[player]
        for player in row['Team 2 Player Ids']:
            elo_dict[player] -= 100 * kd_dict[player]
            elo_dict[player] -= 100 * acs_dict[player]
    else:
        for player in row['Team 2 Player Ids']:
            elo_dict[player] += 100 * kd_dict[player]
            elo_dict[player] += 100 * acs_dict[player]
        for player in row['Team 1 Player Ids']:
            elo_dict[player] -= 100 * kd_dict[player]
            elo_dict[player] -= 100 * acs_dict[player]

# Add total elo of each team to the val matches dataframe
team1_elo_list = []
team2_elo_list = []
for index, row in val_matches.iterrows():
    team1_elo = 0
    team2_elo = 0
    for player in row['Team ' + 1 + ' Player Ids']:
        team1_elo += elo_dict[player]
    for player in row['Team ' + 2 + ' Player Ids']:
        team2_elo += elo_dict[player]
    team1_elo_list.append(team1_elo)
    team2_elo_list.append(team2_elo)

val_matches['Team 1 Elo'] = team1_elo_list
val_matches['Team 2 Elo'] = team2_elo_list


KeyError: 2391

In [None]:
cat_encoder = OneHotEncoder()# instantiate a class
# code the feature called default which takes yes, no value
results_encoded = pd.get_dummies(val_matches, columns=[
    'Map Name',
    'Team 1 Name',
    'Team 2 Name',
])

In [None]:
mlb = MultiLabelBinarizer()
results_encoded = results_encoded.join(pd.DataFrame(mlb.fit_transform(results_encoded.pop('Team 1 Agents')),
                          columns=mlb.classes_,
                          index=val_matches.index))
results_encoded = results_encoded.join(pd.DataFrame(mlb.fit_transform(results_encoded.pop('Team 2 Agents')),
                          columns=mlb.classes_,
                          index=val_matches.index))

In [None]:
Y = results_encoded['Winner']
X = results_encoded.drop("Winner", axis=1)
X.head(3)

In [None]:
#Split testing into 20%
#Set shuffle to false to make sure past data is not used for testing
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1904703, shuffle=False)

In [None]:
#Create an instance of RandomForestClassifier
rf=RandomForestClassifier(random_state=1904703, n_estimators=100)
gb= GradientBoostingClassifier(random_state=1904703, n_estimators=100)
dt= DecisionTreeClassifier(random_state=1904703)
et= ExtraTreeClassifier(random_state=1904703)
lr= LogisticRegression(random_state=1904703)
svc= SVC(random_state=1904703)

#Fit model with training data
rf.fit(X_train, y_train)
gb.fit(X_train, y_train)
dt.fit(X_train, y_train)
et.fit(X_train, y_train)
lr.fit(X_train, y_train)
svc.fit(X_train, y_train)

In [None]:
# Prediction
rf_predictions = rf.predict(X_test)
gb_predictions = gb.predict(X_test)
dt_predictions = dt.predict(X_test)
et_predictions = et.predict(X_test)
lr_predictions = lr.predict(X_test)
svc_predictions = svc.predict(X_test)
#Results
accuracy_rf = accuracy_score(y_test, rf_predictions)
accuracy_gb = accuracy_score(y_test, gb_predictions)
accuracy_dt = accuracy_score(y_test, dt_predictions)
accuracy_et = accuracy_score(y_test, et_predictions)
accuracy_lr = accuracy_score(y_test, lr_predictions)
accuracy_svc = accuracy_score(y_test, svc_predictions)
print(f'The overall accuracy of RF is {np.round(accuracy_rf*100,2)}%')
print(f'The overall accuracy of GB is {np.round(accuracy_gb*100,2)}%')
print(f'The overall accuracy of DT is {np.round(accuracy_dt*100,2)}%')
print(f'The overall accuracy of ET is {np.round(accuracy_et*100,2)}%')
print(f'The overall accuracy of LR is {np.round(accuracy_lr*100,2)}%')
print(f'The overall accuracy of SVC is {np.round(accuracy_svc*100,2)}%')