## Package Imports, Data Load, and Verification

In [1]:
import pandas as pd
print("pandas version:", pd.__version__)
import numpy as np
print("numpy version:", np.__version__)

pandas version: 1.1.1
numpy version: 1.19.1


In [2]:
initial = pd.read_csv("base_stats.csv",
                      dtype = {"team":np.int8,
                               "name":object,
                               "dname":object,
                               "kd":np.float32,
                               "kda":np.float32,
                               "most_killed":np.float32,
                               "win_perc":np.float32}
                     )

In [3]:
initial.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   team         24 non-null     int8   
 1   name         24 non-null     object 
 2   dname        24 non-null     object 
 3   kd           24 non-null     float32
 4   kda          24 non-null     float32
 5   most_killed  24 non-null     float32
 6   win_perc     24 non-null     float32
dtypes: float32(4), int8(1), object(2)
memory usage: 920.0+ bytes


In [4]:
initial.head()

Unnamed: 0,team,name,dname,kd,kda,most_killed,win_perc
0,1,iron,IRONKNIGHT INC,1.08,1.42,35.0,0.4427
1,1,mrspikey,AussieCommander,0.83,1.15,29.0,0.5399
2,1,suffa,SuFFa,0.63,0.93,30.0,0.3934
3,2,saxonj23,Saxonj23,1.15,1.51,35.0,0.5292
4,2,dluith,dluith,0.9,1.23,32.0,0.4802


## Feature Generation and Verification

In [5]:
initial['norm_kd'] = initial.kd/initial.kd.max()
initial['log_norm_mk'] = np.log(initial.most_killed)/np.log(initial.most_killed.max())
initial['assists'] = initial.kda-initial.kd
initial['norm_assists'] = initial.assists/initial.assists.max()
initial['combined'] = (initial.norm_kd+initial.log_norm_mk+initial.win_perc+initial.norm_assists)/4
initial['norm_combined'] = initial.combined/initial.combined.max()

In [6]:
initial.head()

Unnamed: 0,team,name,dname,kd,kda,most_killed,win_perc,norm_kd,log_norm_mk,assists,norm_assists,combined,norm_combined
0,1,iron,IRONKNIGHT INC,1.08,1.42,35.0,0.4427,0.675,0.617705,0.34,0.73913,0.618634,0.765427
1,1,mrspikey,AussieCommander,0.83,1.15,29.0,0.5399,0.51875,0.585032,0.32,0.695652,0.584834,0.723606
2,1,suffa,SuFFa,0.63,0.93,30.0,0.3934,0.39375,0.590922,0.3,0.652174,0.507562,0.627999
3,2,saxonj23,Saxonj23,1.15,1.51,35.0,0.5292,0.71875,0.617705,0.36,0.782609,0.662066,0.819165
4,2,dluith,dluith,0.9,1.23,32.0,0.4802,0.5625,0.602135,0.33,0.717391,0.590557,0.730687


In [7]:
"""
Ratings/elo mapping based on normalized combined score averages

Based on FIDE https://en.wikipedia.org/wiki/Elo_rating_system#Performance_rating
"""

bins = [1.0,.99,.9,.8,.7,.6,.5,.4,.3,.2,.01,0.0,np.NINF]
ratings = [800,677,366,240,149,72,0,-72,-142,-240,-366,-677]
bins.sort()
ratings.sort()
initial['elo'] = pd.cut(initial.norm_combined.values,
                        bins=bins,
                        labels=ratings
                  ).astype(int)

In [8]:
"""
K Factor for updating ELO 
* Based on where you fall within the normalized combined ratings
* 4 different k-values for within the quantiles
* top 25% : 10, 75-50% : 20, 50-25% : 30, 25% below : 40

This uses a modified k-value from FIDE https://en.wikipedia.org/wiki/Elo_rating_system#Most_accurate_K-factor

The idea is:
* 'veteran' players should see less change with wins/loses
* 'novice' players should see more change with wins/loses
"""

quantiles = initial.norm_combined.quantile([0.0,.25,.5,.55,1.0]).values
quantiles[0]=np.NINF
k_value = [5,10,15,20]
initial['k'] = pd.cut(initial.norm_combined.values,
                      bins=quantiles,
                      labels=k_value
                ).astype(int)

In [9]:
"""
Add the team elo calculation back into the initial data set and view
"""
df = (initial.merge(initial.groupby('team')
                           .agg({"elo":'sum'})
                           .reset_index()
                           .rename({'elo':'team_elo'},
                                   axis=1),
                    on='team', 
                    how='left'))
df.head(24)

Unnamed: 0,team,name,dname,kd,kda,most_killed,win_perc,norm_kd,log_norm_mk,assists,norm_assists,combined,norm_combined,elo,k,team_elo
0,1,iron,IRONKNIGHT INC,1.08,1.42,35.0,0.4427,0.675,0.617705,0.34,0.73913,0.618634,0.765427,240,10,629
1,1,mrspikey,AussieCommander,0.83,1.15,29.0,0.5399,0.51875,0.585032,0.32,0.695652,0.584834,0.723606,240,10,629
2,1,suffa,SuFFa,0.63,0.93,30.0,0.3934,0.39375,0.590922,0.3,0.652174,0.507562,0.627999,149,5,629
3,2,saxonj23,Saxonj23,1.15,1.51,35.0,0.5292,0.71875,0.617705,0.36,0.782609,0.662066,0.819165,366,20,755
4,2,dluith,dluith,0.9,1.23,32.0,0.4802,0.5625,0.602135,0.33,0.717391,0.590557,0.730687,240,10,755
5,2,kry lxxvi,Kry LXXXVI,0.63,0.99,21.0,0.4686,0.39375,0.528954,0.36,0.782609,0.543478,0.672438,149,5,755
6,3,capt_TT,Capt_TT,1.25,1.67,43.0,0.5395,0.78125,0.653469,0.42,0.913043,0.721816,0.893092,366,20,846
7,3,divi,Divi8882,0.81,1.14,34.0,0.461,0.50625,0.612668,0.33,0.717391,0.574327,0.710607,240,10,846
8,3,nephew of jimi,taybon,0.76,1.13,25.0,0.4392,0.475,0.559246,0.37,0.804348,0.569448,0.70457,240,5,846
9,4,haydos,Haydos2207,1.23,1.59,42.0,0.5236,0.76875,0.649381,0.36,0.782609,0.681085,0.842697,366,20,755


### Write to .csv for full stats

In [10]:
df.to_csv("full_stats.csv",index=False)

In [11]:
def team_elo_update(df:pd.DataFrame):
    """
    Summary
    -------
    Takes a pandas DataFrame then 
    groups the DataFrame by team and
    aggregates elo using sum
    
    Parameters
    ----------
    df : pandas DataFrame containing columns [team, elo]
    
    Returns
    -------
    A new pandas DataFrame with columns [team, team_elo]
    """
    return (df.groupby('team')
              .agg({"elo":'sum'})
              .reset_index()
              .rename({'elo':'team_elo'},
                      axis=1))

In [12]:
team_elo = team_elo_update(df)
team_elo.head()

Unnamed: 0,team,team_elo
0,1,629
1,2,755
2,3,846
3,4,755
4,5,846


This will eventually display the probability function used below

*Place Holder Cell and Personal Reminder* 

In [13]:
def prob(teamA:int, teamB:int) -> tuple:
    """
    Takes two team numbers and calculates the probabilities for each team winning.
    teamA is the 'home team' per match schedule
    teamB is the 'away team' per match schedule
    * Schedule provided within discord
    
    Parameters
    ----------
    teamA : integer
        The team number found within 'team' column
    
    teamB : integer
        The team number found within 'team' column
        
    Returns
    -------
    Tuple containing the results of each teams probability result.
    """
    ea = (1/(1+10**(
        (team_elo[team_elo.team==teamB].team_elo.values[0] -
         team_elo[team_elo.team==teamA].team_elo.values[0])/400))
         ).round(3)
    eb = (1/(1+10**(
        (team_elo[team_elo.team==teamA].team_elo.values[0] -
         team_elo[team_elo.team==teamB].team_elo.values[0])/400))
         ).round(3)
    return(ea,eb)

In [14]:
def prob_print(teamA:int, teamB:int):
    """
    Summary
    -------
    Takes two team numbers and calculates the probabilities for each team winning.
    teamA is the 'home team' per match schedule
    teamB is the 'away team' per match schedule
    * Schedule provided within discord
    
    Parameters
    ----------
        teamA : integer
        The team number found within 'team' column
    
    teamB : integer
        The team number found within 'team' column
     
    Returns
    -------
    Prints the teams probabilities.
    """
    ea = (1/(1+10**(
        (team_elo[team_elo.team==teamB].team_elo.values[0] -
         team_elo[team_elo.team==teamA].team_elo.values[0])/400))
         ).round(3)
    eb = (1/(1+10**(
        (team_elo[team_elo.team==teamA].team_elo.values[0] -
         team_elo[team_elo.team==teamB].team_elo.values[0])/400))
         ).round(3)
    print("Team",teamA,"has a:",ea,"% chance and Team",teamB,"has a:",eb,"% chance of winning")

## Round 1-7 Initial
### This is done for all 7 initial rounds to show what the probabilities would be without any update to elo
### Subsequent updates for each round will include a reference to these to show the changes

In [15]:
print("Round 1")
# Team 7 v Team 5
prob_print(7,5)
# Team 4 v Team 2
prob_print(4,2)
# Team 1 v Team 3
prob_print(1,3)
# Team 8 v Team 6
prob_print(8,6)

Round 1
Team 7 has a: 0.857 % chance and Team 5 has a: 0.143 % chance of winning
Team 4 has a: 0.5 % chance and Team 2 has a: 0.5 % chance of winning
Team 1 has a: 0.223 % chance and Team 3 has a: 0.777 % chance of winning
Team 8 has a: 0.67 % chance and Team 6 has a: 0.33 % chance of winning


In [16]:
print("Round 2")
# Team 3 v Team 4
prob_print(3,4)
# Team 2 v Team 8
prob_print(2,8)
# Team 5 v Team 1
prob_print(5,7)
# Team 6 v Team 7
prob_print(6,7)

Round 2
Team 3 has a: 0.628 % chance and Team 4 has a: 0.372 % chance of winning
Team 2 has a: 0.038 % chance and Team 8 has a: 0.962 % chance of winning
Team 5 has a: 0.143 % chance and Team 7 has a: 0.857 % chance of winning
Team 6 has a: 0.55 % chance and Team 7 has a: 0.45 % chance of winning


In [17]:
print("Round 3")
# Team 4 v Team 5
prob_print(4,5)
# Team 1 v Team 6
prob_print(1,6)
# Team 8 v Team 7
prob_print(8,7)
# Team 2 v Team 3
prob_print(2,3)

Round 3
Team 4 has a: 0.372 % chance and Team 5 has a: 0.628 % chance of winning
Team 1 has a: 0.038 % chance and Team 6 has a: 0.962 % chance of winning
Team 8 has a: 0.713 % chance and Team 7 has a: 0.287 % chance of winning
Team 2 has a: 0.372 % chance and Team 3 has a: 0.628 % chance of winning


In [18]:
print("Round 4")
# Team 7 v Team 1
prob_print(7,1)
# Team 5 v Team 2
prob_print(5,2)
# Team 6 v Team 4
prob_print(6,4)
# Team 3 v Team 8
prob_print(3,8)

Round 4
Team 7 has a: 0.954 % chance and Team 1 has a: 0.046 % chance of winning
Team 5 has a: 0.628 % chance and Team 2 has a: 0.372 % chance of winning
Team 6 has a: 0.925 % chance and Team 4 has a: 0.075 % chance of winning
Team 3 has a: 0.063 % chance and Team 8 has a: 0.937 % chance of winning


In [19]:
print("Round 5")
# Team 3 v Team 5
prob_print(3,5)
# Team 4 v Team 7
prob_print(4,7)
# Team 2 v Team 6
prob_print(2,6)
# Team 8 v Team 1
prob_print(8,1)

Round 5
Team 3 has a: 0.5 % chance and Team 5 has a: 0.5 % chance of winning
Team 4 has a: 0.09 % chance and Team 7 has a: 0.91 % chance of winning
Team 2 has a: 0.075 % chance and Team 6 has a: 0.925 % chance of winning
Team 8 has a: 0.981 % chance and Team 1 has a: 0.019 % chance of winning


In [20]:
print("Round 6")
# Team 7 v Team 2
prob_print(7,2)
# Team 6 v Team 3
prob_print(6,3)
# Team 5 v Team 8
prob_print(5,8)
# Team 1 v Team 4
prob_print(1,4)

Round 6
Team 7 has a: 0.91 % chance and Team 2 has a: 0.09 % chance of winning
Team 6 has a: 0.88 % chance and Team 3 has a: 0.12 % chance of winning
Team 5 has a: 0.063 % chance and Team 8 has a: 0.937 % chance of winning
Team 1 has a: 0.326 % chance and Team 4 has a: 0.674 % chance of winning


In [21]:
print("Round 7")
# Team 2 v Team 1
prob_print(2,1)
# Team 8 v Team 4
prob_print(8,4)
# Team 3 v Team 7
prob_print(3,7)
# Team 5 v Team 6
prob_print(5,6)

Round 7
Team 2 has a: 0.674 % chance and Team 1 has a: 0.326 % chance of winning
Team 8 has a: 0.962 % chance and Team 4 has a: 0.038 % chance of winning
Team 3 has a: 0.143 % chance and Team 7 has a: 0.857 % chance of winning
Team 5 has a: 0.12 % chance and Team 6 has a: 0.88 % chance of winning


In [22]:
def elo_update(df:pd.DataFrame,teamA:int,teamB:int,result:int) -> pd.DataFrame:
    """
    Summary
    -------
    Takes a team number as a winner and provides the new elo scores
    for the winning and losing team.
    
    Parameters
    ----------
    df : pandas DataFrame
     The dataset containing the full set of stats
     
    teamA : integer
        The team number found within 'team' column
    
    teamB : integer
        The team number found within 'team' column
        
    result : integer
        The team number found within 'team' column that won the match
    
    Returns
    -------
    A pandas DataFrame with updated elo and team elo calculations
    """
    df.drop(['team_elo'], axis=1, inplace=True) # Remove the old team elo
    ea,eb = prob(teamA,teamB)
    if teamA == result:
        for member in df[df.team==teamA].index:
            df.iloc[member,13]=df.iloc[member,13]+df.iloc[member,14]*(1-ea)
        for member in df[df.team==teamB].index:
            df.iloc[member,13]=df.iloc[member,13]+df.iloc[member,14]*(0-eb)
    else:
        for member in df[df.team==teamA].index:
            df.iloc[member,13]=df.iloc[member,13]+df.iloc[member,14]*(0-ea)
        for member in df[df.team==teamB].index:
            df.iloc[member,13]=df.iloc[member,13]+df.iloc[member,14]*(1-eb)
    return (df.merge(df.groupby('team')
                       .agg({"elo":'sum'})
                       .reset_index()
                       .rename({'elo':'team_elo'},
                               axis=1),
                     on='team', 
                     how='left'))

## Round 1 Initial

In [38]:
print("Round 1")
# Team 7 v Team 5
prob_print(7,5)
# Team 4 v Team 2
prob_print(4,2)
# Team 1 v Team 3
prob_print(1,3)
# Team 8 v Team 6
prob_print(8,6)

Round 1
Team 7 has a: 0.904 % chance and Team 5 has a: 0.096 % chance of winning
Team 4 has a: 0.594 % chance and Team 2 has a: 0.406 % chance of winning
Team 1 has a: 0.276 % chance and Team 3 has a: 0.724 % chance of winning
Team 8 has a: 0.745 % chance and Team 6 has a: 0.255 % chance of winning


## Round 1 Results

In [23]:
# Round 2 ELO update from round 1 results
round2 = df.copy()
round2 = elo_update(round2,7,5,7) # 'right'
round2 = elo_update(round2,4,2,4) # 'right'
round2 = elo_update(round2,1,3,3) # 'right'
round2 = elo_update(round2,8,6,8) # 'right'

## Round 2 Updated

### Initial Probabilities

In [24]:
team_elo = team_elo_update(initial)
print("Round 2")
# Team 3 v Team 4
prob_print(3,4)
# Team 2 v Team 8
prob_print(2,8)
# Team 2 v Team 6
prob_print(5,1)
# Team 8 v Team 1
prob_print(6,7)

Round 2
Team 3 has a: 0.628 % chance and Team 4 has a: 0.372 % chance of winning
Team 2 has a: 0.038 % chance and Team 8 has a: 0.962 % chance of winning
Team 5 has a: 0.777 % chance and Team 1 has a: 0.223 % chance of winning
Team 6 has a: 0.55 % chance and Team 7 has a: 0.45 % chance of winning


### Post Round 1 Match Results Probabilities

In [25]:
team_elo = team_elo_update(round2)
print("Round 2")
# Team 3 v Team 4
prob_print(3,4)
# Team 2 v Team 8
prob_print(2,8)
# Team 2 v Team 6
prob_print(5,1)
# Team 8 v Team 1
prob_print(6,7)

Round 2
Team 3 has a: 0.615 % chance and Team 4 has a: 0.385 % chance of winning
Team 2 has a: 0.032 % chance and Team 8 has a: 0.968 % chance of winning
Team 5 has a: 0.775 % chance and Team 1 has a: 0.225 % chance of winning
Team 6 has a: 0.519 % chance and Team 7 has a: 0.481 % chance of winning


## Round 2 Results

In [26]:
# Round 3 ELO update from round 2 results
round3 = round2.copy()
round3 = elo_update(round3,3,4,4) # 'wrong'/'upset'
round3 = elo_update(round3,2,8,8) # 'right'
round3 = elo_update(round3,5,1,5) # 'right'
round3 = elo_update(round3,6,7,7) # 'wrong'

## Round 3 Probabilities

### Initial Probabilities

In [27]:
team_elo = team_elo_update(initial)
print("Round 3")
# Team 4 v Team 5
prob_print(4,5)
# Team 1 v Team 6
prob_print(1,6)
# Team 8 v Team 7
prob_print(8,7)
# Team 2 v Team 3
prob_print(2,3)

Round 3
Team 4 has a: 0.372 % chance and Team 5 has a: 0.628 % chance of winning
Team 1 has a: 0.038 % chance and Team 6 has a: 0.962 % chance of winning
Team 8 has a: 0.713 % chance and Team 7 has a: 0.287 % chance of winning
Team 2 has a: 0.372 % chance and Team 3 has a: 0.628 % chance of winning


### Post Round 2 Result Probabilities

In [28]:
team_elo = team_elo_update(round3)
print("Round 3")
# Team 4 v Team 5
prob_print(4,5)
# Team 1 v Team 6
prob_print(1,6)
# Team 8 v Team 7
prob_print(8,7)
# Team 2 v Team 3
prob_print(2,3)

Round 3
Team 4 has a: 0.419 % chance and Team 5 has a: 0.581 % chance of winning
Team 1 has a: 0.044 % chance and Team 6 has a: 0.956 % chance of winning
Team 8 has a: 0.693 % chance and Team 7 has a: 0.307 % chance of winning
Team 2 has a: 0.365 % chance and Team 3 has a: 0.635 % chance of winning


## Round 3 Results

In [29]:
# Round 4 ELO update from round 3 results
round4 = round3.copy()
round4 = elo_update(round4,4,5,4) # 'wrong'
round4 = elo_update(round4,1,6,6) # 'right'
round4 = elo_update(round4,8,7,7) # 'wrong'/'upset'
round4 = elo_update(round4,2,3,2) # 'wrong'/'upset'

## Round 4 Probabilites

### Initial Probabilities

In [30]:
team_elo = team_elo_update(initial)
print("Round 4")
# Team 7 v Team 1
prob_print(7,1)
# Team 5 v Team 2
prob_print(5,2)
# Team 6 v Team 4
prob_print(6,4)
# Team 3 v Team 8
prob_print(3,8)

Round 4
Team 7 has a: 0.954 % chance and Team 1 has a: 0.046 % chance of winning
Team 5 has a: 0.628 % chance and Team 2 has a: 0.372 % chance of winning
Team 6 has a: 0.925 % chance and Team 4 has a: 0.075 % chance of winning
Team 3 has a: 0.063 % chance and Team 8 has a: 0.937 % chance of winning


### Post Round 3 Probabilties

In [31]:
team_elo = team_elo_update(round4)
print("Round 4")
# Team 7 v Team 1
prob_print(7,1)
# Team 5 v Team 2
prob_print(5,2)
# Team 6 v Team 4
prob_print(6,4)
# Team 3 v Team 8
prob_print(3,8)

Round 4
Team 7 has a: 0.971 % chance and Team 1 has a: 0.029 % chance of winning
Team 5 has a: 0.585 % chance and Team 2 has a: 0.415 % chance of winning
Team 6 has a: 0.877 % chance and Team 4 has a: 0.123 % chance of winning
Team 3 has a: 0.056 % chance and Team 8 has a: 0.944 % chance of winning


## Round 4 Results

In [32]:
round5 = round4.copy()
round5 = elo_update(round5,7,1,1) # 'wrong'/'upset'
round5 = elo_update(round5,5,2,2) # 'wrong'
round5 = elo_update(round5,6,4,4) # 'wrong'/'upset'
round5 = elo_update(round5,3,8,8) # 'right'

## Round 5 Probabilities

### Initial Probabilities

In [33]:
team_elo = team_elo_update(initial)
print("Round 5")
# Team 3 v Team 5
prob_print(3,5)
# Team 4 v Team 7
prob_print(4,7)
# Team 2 v Team 6
prob_print(2,6)
# Team 8 v Team 1
prob_print(8,1)

Round 5
Team 3 has a: 0.5 % chance and Team 5 has a: 0.5 % chance of winning
Team 4 has a: 0.09 % chance and Team 7 has a: 0.91 % chance of winning
Team 2 has a: 0.075 % chance and Team 6 has a: 0.925 % chance of winning
Team 8 has a: 0.981 % chance and Team 1 has a: 0.019 % chance of winning


### Post Round 4 Result probabilities

In [34]:
team_elo = team_elo_update(round5)
print("Round 5")
# Team 3 v Team 5
prob_print(3,5)
# Team 4 v Team 7
prob_print(4,7)
# Team 2 v Team 6
prob_print(2,6)
# Team 8 v Team 1
prob_print(8,1)

Round 5
Team 3 has a: 0.531 % chance and Team 5 has a: 0.469 % chance of winning
Team 4 has a: 0.129 % chance and Team 7 has a: 0.871 % chance of winning
Team 2 has a: 0.126 % chance and Team 6 has a: 0.874 % chance of winning
Team 8 has a: 0.978 % chance and Team 1 has a: 0.022 % chance of winning


## Round 5 Results

## Round 6 Probabilities

### Initial Probabilities

### Post Round 5 Result probabilities

## Round 6 Results

## Round 7 Probabilities

### Initial Probabilities

### Post Round 6 Result probabilities

## Elo Change From Start to Current Round

In [35]:
delta_elo = (pd.DataFrame(round3.elo - df.elo)
               .rename({'elo':'delta_elo'}
                       ,axis=1))
delta_team_elo = (pd.DataFrame(round3.team_elo - df.team_elo)
                    .rename({'team_elo':'delta_team_elo'}
                            ,axis=1))
change = pd.DataFrame(df.dname).join(delta_elo).join(delta_team_elo)
change['perc_of_delta'] =  ((change.delta_elo/change.delta_team_elo)*100).round(2)
change.head(24)

Unnamed: 0,dname,delta_elo,delta_team_elo,perc_of_delta
0,IRONKNIGHT INC,-4.48,-11.2,40.0
1,AussieCommander,-4.48,-11.2,40.0
2,SuFFa,-2.24,-11.2,20.0
3,Saxonj23,-10.64,-18.62,57.14
4,dluith,-5.32,-18.62,28.57
5,Kry LXXXVI,-2.66,-18.62,14.29
6,Capt_TT,-7.84,-13.72,57.14
7,Divi8882,-3.92,-13.72,28.57
8,taybon,-1.96,-13.72,14.29
9,Haydos2207,22.3,39.025,57.14


# Future Possible Team Balancing

In [36]:
# Player average ELO
print(df.elo.mean(), "average player ELO")

# Average Team ELO based on average player elo
print(df.elo.mean() * 3, "average team ELO")

312.2916666666667 average player ELO
936.875 average team ELO


In [37]:
# Players (3) ELO combined should be as closed to possible to Mean Player ELO * Team Size