## Making a Predictor for the last hero of a team

Based on the fNN that predicts heroes and the probability that a set of 5 heroes will win, run through the available choices of remaining heroes and recommend the 5 choices with the best probability of winning

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer

from tensorflow.keras.models import load_model

### The below cell is run to initilize the count vectorizer, which is needed when running the recommender

In [3]:
# importing the cleaned match data
match_df = pd.read_csv("../data/complete_match_df.csv")

rad_cvec = CountVectorizer()
dire_cvec = CountVectorizer()
rad_df = pd.DataFrame(rad_cvec.fit_transform(match_df['radiant']).toarray() , columns = rad_cvec.get_feature_names())
dire_df = pd.DataFrame(dire_cvec.fit_transform(match_df['dire']).toarray() , columns = dire_cvec.get_feature_names())
match_hero_df = pd.concat([rad_df,dire_df], axis=1)

X = match_hero_df
y = match_df['winner'].map({'radiant' : 1 , 'dire' : 0})

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=2020)

sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [4]:
#importing the trained NN to generate recommendations
NN_model = load_model("../data/NN_model/NN_model")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## Below is the code which provides the probability and which team will win for a complete 5v5 Dota 2 match

In [16]:
# creating a list of hero names to run the model on
hero_data = {
'dire_hero_1': 'tidehunter',
 'dire_hero_2': 'lion',
 'dire_hero_3': 'sniper',
 'dire_hero_4': 'sven',
 'dire_hero_5': 'vengefulspirit',
 'radiant_hero_1': 'juggernaut',
 'radiant_hero_2': 'drow_ranger',
 'radiant_hero_3': 'dragon_knight',
 'radiant_hero_4': 'witch_doctor',
 'radiant_hero_5': 'axe'
}

# making a df to store the heroes and teams
test_game_df = pd.DataFrame()

for key,value in hero_data.items():
    test_game_df.loc[0,key] = value

# replacing the empty hero selection with np.NaN so it can be removed easily
test_game_df.replace("",np.NaN,inplace=True)

rad_heroes = ''
dire_heroes = ''

if test_game_df.isna().any().any():
    test_game_df.dropna(axis=1,inplace=True)

# creating the list of heroes on each team with r_ or d_ for whether the hero was on radiant or dire
for x in test_game_df.columns:
    if "radiant" in x:
        rad_heroes += (' r_'+ test_game_df[x])
    else:
        dire_heroes += (' d_'+ test_game_df[x])

# adding the hero lists to the dataframe with all the individual hero information        
test_game_df['radiant'] = rad_heroes[0]
test_game_df['dire'] = dire_heroes[0]

# running count vectorizer on the hero lists for radiant and dire
temp_rad_df = pd.DataFrame(rad_cvec.transform(test_game_df['radiant']).toarray() , columns = rad_cvec.get_feature_names())
temp_dire_df = pd.DataFrame(dire_cvec.transform(test_game_df['dire']).toarray() , columns = dire_cvec.get_feature_names())

# joining the radiaint and dire teams to create the final dataframe of 238 features
temp_match_hero_df = pd.concat([temp_rad_df,temp_dire_df], axis=1)

# scaling the dataframe
X_team_sc = sc.transform(temp_match_hero_df)

#generating the probability of winning for the indivial team
probab = NN_model.predict_proba(X_team_sc)[0][0]

# printing the probability of which team will win
if NN_model.predict_classes(X_team_sc)[0][0] == 1:
    print(f'Radiant is predicted to win with a {np.round(probab*100,4)}% chance')
else:
    print(f'Dire is predicted to win with a {np.round((1-probab)*100,4)}% chance')

Radiant is predicted to win with a 64.48% chance


## Below is the code which recommends the 5 heroes that provide highest win probability

In [17]:
# creating a list of hero names to run the model on
data = {
'dire_hero_1': 'tidehunter',
 'dire_hero_2': 'lion',
 'dire_hero_3': 'sniper',
 'dire_hero_4': 'sven',
 'dire_hero_5': '',
 'radiant_hero_1': 'juggernaut',
 'radiant_hero_2': 'drow_ranger',
 'radiant_hero_3': 'dragon_knight',
 'radiant_hero_4': 'witch_doctor',
 'radiant_hero_5': 'axe'
}

# making a df to store the heroes and teams
test_game_df = pd.DataFrame()

for key,value in data.items():
    test_game_df.loc[0,key] = value

# replacing the empty hero selection with np.NaN so it can be removed easily
test_game_df.replace("",np.NaN,inplace=True)

rad_heroes = ''
dire_heroes = ''

# remove the np.NaN column so the amount of heroes on one team is 4
if test_game_df.isna().any().any():
    test_game_df.dropna(axis=1,inplace=True)

# creating the list of heroes on each team with r_ or d_ for whether the hero was on radiant or dire
for x in test_game_df.columns:
    if "radiant" in x:
        rad_heroes += (' r_'+ test_game_df[x])
    else:
        dire_heroes += (' d_'+ test_game_df[x])

test_game_df['radiant'] = rad_heroes[0]
test_game_df['dire'] = dire_heroes[0]

# creating a list of heroes that weren't on either team to run all possible teams
unused_radiant_heroes_list = [x for x in rad_cvec.vocabulary_.keys() if (x[2:] not in rad_heroes.any().split(' r_')) and (x[2:] not in dire_heroes.any().split(' d_'))]
unused_dire_heroes_list = [x for x in dire_cvec.vocabulary_.keys() if (x[2:] not in rad_heroes.any().split(' r_')) and (x[2:] not in dire_heroes.any().split(' d_'))]

#creating a df to store the probabilities for each hero so they can be sorted later
probabilities_df = pd.DataFrame()

# when a radiant hero is missing the length is 4
if (len(rad_heroes[0].split()) == 4) and (len(dire_heroes[0].split()) == 5):
    #radiant team hero missing
    
    # generate the 119 features for the dire team
    temp_dire_df = pd.DataFrame(dire_cvec.transform(test_game_df['dire']).toarray() , columns = dire_cvec.get_feature_names())
    
    # loop through the unused heroes to generate match probabilities
    for hero in unused_radiant_heroes_list:
        team = test_game_df['radiant'].values[0]
        team += (' ' + hero)

        test_game_df['radiant_team'] = team
        
        # for each unused hero, create the feature matrix for 119 columns
        temp_rad_df = pd.DataFrame(rad_cvec.transform(test_game_df['radiant_team']).toarray() , columns = rad_cvec.get_feature_names())
        temp_match_hero_df = pd.concat([temp_rad_df,temp_dire_df], axis=1)

        X_team_sc = sc.transform(temp_match_hero_df)

        # generate the probabilities of a specific team winning
        probabilities_df.loc['prob',hero] = NN_model.predict_proba(X_team_sc)[0][0]

    # sort the heroes by the 5 highest probabilities and save these to display later
    pred = probabilities_df.T.sort_values(by='prob',ascending=False).head(5)

# when a dire hero is missing the length is 4
elif (len(rad_heroes[0].split()) == 5) and (len(dire_heroes[0].split()) == 4):
    #dire team hero missing
    temp_rad_df = pd.DataFrame(rad_cvec.transform(test_game_df['radiant']).toarray() , columns = rad_cvec.get_feature_names())

    for hero in unused_dire_heroes_list:
        team = test_game_df['dire'].values[0]
        team += (' ' + hero)

        test_game_df['dire_team'] = team
        temp_dire_df = pd.DataFrame(dire_cvec.transform(test_game_df['dire_team']).toarray() , columns = dire_cvec.get_feature_names())
        temp_match_hero_df = pd.concat([temp_rad_df,temp_dire_df], axis=1)

        X_team_sc = sc.transform(temp_match_hero_df)

        probabilities_df.loc['prob',hero] = (1-NN_model.predict_proba(X_team_sc)[0][0])

    pred = probabilities_df.T.sort_values(by='prob',ascending=False).head(5)

else:
    pred = "Please only have 1 missing hero"

In [18]:
pred

Unnamed: 0,prob
d_abyssal_underlord,0.43319
d_broodmother,0.419198
d_clinkz,0.416517
d_dark_seer,0.41072
d_chaos_knight,0.403618
