# Mission statement #

Want to build a neural network predicting the outcome of a match, given the hero matchup (5 radiant heroes and 5 dire)

In [2]:
import requests
import pprint
import json
import pandas as pd
import numpy as np
#import tensorflow
import math
import time
#from tensorflow import keras
#from tqdm import tqdm_notebook
import pickle

In [3]:
import matplotlib.pyplot as plt

Get the hero dictionary:

In [4]:
h = requests.get("https://api.opendota.com/api/heroes")
hero_df = pd.DataFrame(json.loads(h.text))

# Mine the required data #

We want to get high-mmr matches, using OpenDota API which gives a random sample of 100 recent games. Work iteratively - per every call, sort out the 100 matches, get 100 older matches and repeat.

In [5]:
def pickaxe(below_match_id = None):
    
    # Pick out relevant matches from a sample of 100
    
    if below_match_id == None:
        string = 'https://api.opendota.com/api/publicMatches'
    else:
        string = 'https://api.opendota.com/api/publicMatches?less_than_match_id=' + str(below_match_id)
    A = requests.get(string).json()
    over4k = []
    for i in range(len(A)):
        try:
            if A[i].get('avg_mmr', None) is not None:
                if A[i]['game_mode'] == 22 and A[i]['avg_mmr'] > 4000:
                    over4k.append(A[i])
        except: return over4k, below_match_id
    lowest_match_id = A[-1]['match_id']
    return over4k, lowest_match_id

def picked_heroes(hero_string):
    
    # Convert the string from pickaxe() to a length 116 array with 1s on picked heroes, 0 elsewhere.
    
    team_input = np.zeros(len(hero_df))
    y = [int(i) for i in hero_string.split(',')]
    for item in y:
        idx = int(hero_df[hero_df['id'] == item].index[0])
        team_input[idx] += 1
    return team_input

def process_the_pickaxe(game_list):
    Xt = np.zeros(232)
    yt = np.array(0)
    for item in game_list:
        X = np.hstack((picked_heroes(item['radiant_team']), picked_heroes(item['dire_team'])))
        y = np.array(0)
        y += item['radiant_win']*1
        Xt = np.vstack((Xt,X))
        yt = np.vstack((yt,y))
    Xt = np.delete(Xt, 0, 0)
    yt = np.delete(yt, 0, 0)
    if Xt.shape == (231,):
        Xt = None
        yt = None
    return Xt, yt

def single_call(last_match_id = None):
    
    # Perform a call for 100 matches and process it.
    
    if last_match_id == None:
        a,c = pickaxe()
    else:
        a,c = pickaxe(last_match_id)
    X_4, y_4 = process_the_pickaxe(a)
    return X_4, y_4, c

def multiple_calls(call_number = 60, last_match_id = None):
    Total_X_4 = np.zeros(232)
    Total_y_4 = np.array(0)
    for i in tqdm_notebook(range(call_number)):
        if i % 60 == 0 and i > 0:
            time.sleep(70)
        X_4, y_4, last_match_id = single_call(last_match_id = last_match_id)
        if X_4 is not None:
            Total_X_4 = np.vstack((Total_X_4, X_4))
            Total_y_4 = np.vstack((Total_y_4, y_4))
    Total_X_4 = np.delete(Total_X_4, 0, 0)
    Total_y_4 = np.delete(Total_y_4, 0, 0)
    return Total_X_4, Total_y_4, last_match_id

def final_processing(X,y, test_ratio = 0.1):
    N = int(np.ceil(len(X)*test_ratio))
    X_test = X[:N]
    y_test = y[:N]
    X_train = X[N:]
    y_train = y[N:]
    return X_train, y_train, X_test, y_test 

In [6]:
X4, y4, _ = multiple_calls(call_number = 6000)
print('Data and target sizes:', X4.shape, y4.shape)

HBox(children=(IntProgress(value=0, max=6000), HTML(value='')))



Data and target sizes: (44899, 232) (44899, 1)


In [10]:
X_train, y_train, X_test, y_test = final_processing(X4,y4)

Save onto pickled files:

In [7]:
filename = 'X4'
outfile = open(filename,'wb')
pickle.dump(X4,outfile)
outfile.close()

In [8]:
filename = 'y4'
outfile = open(filename,'wb')
pickle.dump(y4,outfile)
outfile.close()

In [17]:
_

4439774104

Open the pickled files:

In [4]:
infile = open('X4','rb')
XX4 = pickle.load(infile)
infile.close()

In [5]:
infile = open('y4','rb')
yy4 = pickle.load(infile)
infile.close()

In [6]:
X4 = XX4
y4 = yy4

Handy tool to convert heroes to an numpy 232 array:

In [7]:
def string_to_array(radiant,dire):
    X = np.zeros(232)
    for item in radiant:
        idx = hero_df[hero_df['localized_name']==item].index[0]
        X[idx] += 1
    for item in dire:
        idx = hero_df[hero_df['localized_name']==item].index[0] + 116
        X[idx] +=1
    return X

In [31]:
hero_df.head()

Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"[Carry, Escape, Nuker]"
1,Melee,2,2,Axe,npc_dota_hero_axe,str,"[Initiator, Durable, Disabler, Jungler]"
2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"[Support, Disabler, Nuker, Durable]"
3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"[Carry, Disabler, Jungler, Nuker, Initiator]"
4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"[Support, Disabler, Nuker, Jungler]"


In [79]:
hero_df[hero_df['localized_name']=='Lycan']

Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
75,Melee,77,2,Lycan,npc_dota_hero_lycan,str,"[Carry, Pusher, Jungler, Durable, Escape]"


# Neural network #

In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

In [None]:
model = Sequential([
    Dense(64, input_dim = 232, activation = 'tanh'),
    Dropout(0.2),
    Dense(32, activation = 'tanh'),
    Dropout(0.2),
    Dense(8, activation = 'tanh'),
    Dropout(0.2),
    Dense(1, activation = 'sigmoid')
])

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.summary()
history = model.fit(X_train,y_train, validation_split = 0.1 ,batch_size = 10, epochs=5)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_138 (Dense)            (None, 64)                14912     
_________________________________________________________________
dropout_81 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_139 (Dense)            (None, 32)                2080      
_________________________________________________________________
dropout_82 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_140 (Dense)            (None, 8)                 264       
_________________________________________________________________
dropout_83 (Dropout)         (None, 8)                 0         
_________________________________________________________________
dense_141 (Dense)            (None, 1)                 9         
Total para

In [None]:
print('Accuracy on the test set:', model.evaluate(X_test,y_test)[1])

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [94]:
radiant =[]
dire = ['Invoker'] 
X_toy = string_to_array(radiant,dire)
X_toy = X_toy.reshape((1,-1))
print('Radiant win probability is', model.predict(X_toy)[0][0])

Radiant win probability is 0.5870339


In [90]:
X_toy.sum()

10.0

# Make an easy-to-input hero list: (later..) #

In [56]:
# def hero_to_id(string):

#### Playground:####

## Random Forest Classifier ##

In [100]:
from sklearn.ensemble import RandomForestClassifier

In [101]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
print('Testing score:', rf.score(X_test,y_test))
print('Training score:', rf.score(X_train,y_train))

  


Testing score: 0.5287305122494432
Training score: 0.9873543022593977


## Gradient Boosting Machine ##

In [98]:
from sklearn.ensemble import GradientBoostingClassifier
gbm = GradientBoostingClassifier()
gbm.fit(X_train,y_train)
print('Score on training test:', gbm.score(X_train,y_train))
print('Score on test test:', gbm.score(X_test,y_test))

  y = column_or_1d(y, warn=True)


Score on training test: 0.592838229107377
Score on test test: 0.5761692650334076


In [95]:
from sklearn.linear_model import LogisticRegressionCV

In [97]:
Cs = np.logspace(-2,5,10)
lr = LogisticRegressionCV(Cs=Cs)
lr.fit(X_train,y_train)
print('Testing score:', lr.score(X_test,y_test))
print('Training score:', lr.score(X_train,y_train))

  y = column_or_1d(y, warn=True)


Testing score: 0.5953229398663697
Training score: 0.5880373184191641


# Playground #

In [6]:
hero_df.head()

Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"[Carry, Escape, Nuker]"
1,Melee,2,2,Axe,npc_dota_hero_axe,str,"[Initiator, Durable, Disabler, Jungler]"
2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"[Support, Disabler, Nuker, Durable]"
3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"[Carry, Disabler, Jungler, Nuker, Initiator]"
4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"[Support, Disabler, Nuker, Jungler]"


In [9]:
from sklearn.feature_extraction.text import CountVectorizer

In [10]:
cv = CountVectorizer()

In [23]:
cv.fit(item for item in hero_df['roles'][0])

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)

In [24]:
cv.vocabulary_

{'carry': 0, 'escape': 1, 'nuker': 2}

In [30]:
for i in range(len(hero_df)):
    cv.fit(hero_df['roles'][i])

In [32]:
cv.vocabulary

In [49]:
roles = []
for i in range(len(hero_df)):
    roles.extend(hero_df['roles'][i])

In [50]:
cv.fit(roles)

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)

In [51]:
cv.vocabulary_

{'carry': 0,
 'escape': 3,
 'nuker': 6,
 'initiator': 4,
 'durable': 2,
 'disabler': 1,
 'jungler': 5,
 'support': 8,
 'pusher': 7}

In [54]:
cv.transform(['carry'])

<1x9 sparse matrix of type '<class 'numpy.int64'>'
	with 1 stored elements in Compressed Sparse Row format>