# Mission statement #

Want to build a neural network predicting the outcome of a match, given the hero matchup (5 radiant heroes and 5 dire)

In [1]:
import requests
import pprint
import json
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras

Get the hero dictionary:

In [2]:
h = requests.get("https://api.opendota.com/api/heroes")
heroes = json.loads(h.text)
hero_df = pd.DataFrame(heroes)

# Mine the required data. #

We want to get high-mmr matches, using OpenDota API which gives a random sample of 100 recent games. Work iteratively - per every call, sort out the 100 matches, get 100 older matches and repeat.

In [3]:
def pickaxe(below_match_id = None):
    
    # Pick out relevant matches from a sample of 100
    
    if below_match_id == None:
        string = 'https://api.opendota.com/api/publicMatches'
    else:
        string = 'https://api.opendota.com/api/publicMatches?less_than_match_id=' + str(below_match_id)
    a = requests.get(string)
    A = json.loads(a.text)
    over4k = []
    over5k = []
    for i in range(len(A)):
        if A[i]['avg_mmr'] is not None:
            if A[i]['game_mode'] == 22 and A[i]['avg_mmr'] > 4000 and A[i]['avg_mmr'] < 5000:
                over4k.append(A[i])
            elif A[i]['game_mode'] == 2 and A[i]['avg_mmr'] > 5000:
                over5k.append(A[i]) 
    lowest_match_id = A[-1]['match_id']
    return over4k, over5k, lowest_match_id

def picked_heroes(hero_string):
    
    # Convert the string from pickaxe() to a length 116 array with 1s on picked heroes, 0 elsewhere.
    
    team_input = np.zeros(len(hero_df))
    y = [int(i) for i in hero_string.split(',')]
    for item in y:
        idx = int(hero_df[hero_df['id'] == item].index[0])
        team_input[idx] += 1
    return team_input

def process_the_pickaxe(game_list):
    Xt = np.zeros(232)
    yt = np.array(0)
    for item in game_list:
        X = np.hstack((picked_heroes(item['radiant_team']), picked_heroes(item['dire_team'])))
        y = np.array(0)
        y += item['radiant_win']*1
        Xt = np.vstack((Xt,X))
        yt = np.vstack((yt,y))
    Xt = np.delete(Xt, 0, 0)
    yt = np.delete(yt, 0, 0)
    if Xt.shape == (231,):
        Xt = None
        yt = None
    return Xt, yt

def single_call(last_match_id = None):
    
    # Perform a call for 100 matches and process it.
    
    if last_match_id == None:
        a,b,c = pickaxe()
    else:
        a,b,c = pickaxe(last_match_id)
    X_4, y_4 = process_the_pickaxe(a)
    X_5, y_5 = process_the_pickaxe(b)
    return X_4, y_4, X_5, y_5, c

def multiple_calls(call_number = 60, last_match_id = None):
    Total_X_4 = np.zeros(232)
    Total_X_5 = np.zeros(232)
    Total_y_4 = np.array(0)
    Total_y_5 = np.array(0)
    for i in range(call_number):
        X_4, y_4, X_5, y_5, last_match_id = single_call(last_match_id = last_match_id)
        if X_4 is not None:
            Total_X_4 = np.vstack((Total_X_4, X_4))
            Total_y_4 = np.vstack((Total_y_4, y_4))
        if X_5 is not None:
            Total_X_5 = np.vstack((Total_X_5, X_5))
            Total_y_5 = np.vstack((Total_y_5, y_5))
            continue
    Total_X_4 = np.delete(Total_X_4, 0, 0)
    Total_X_5 = np.delete(Total_X_5, 0, 0)
    Total_y_4 = np.delete(Total_y_4, 0, 0)
    Total_y_5 = np.delete(Total_y_5, 0, 0)
    return Total_X_4, Total_y_4, Total_X_5, Total_y_5, last_match_id

# Neural network #

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [10]:
model = Sequential()
model.add(Dense(100, input_dim = 232, activation = 'relu'))
model.add(Dense(50, activation = 'relu'))
model.add(Dense(1, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.summary()
model.fit(X_train,y_train,batch_size = 10, epochs=5)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               23300     
_________________________________________________________________
dense_4 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 51        
Total params: 28,401
Trainable params: 28,401
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x20dfb55cef0>

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x205010bb978>

In [58]:
y_train.shape

(421, 232)

In [59]:
X_train.shape

(421, 232)

#### Playground:####

In [6]:
X4, y4, X5, y5, _ = multiple_calls()



In [7]:
print(X4.shape, y4.shape, X5.shape, y5.shape)

(411, 232) (411, 1) (231,) ()


In [11]:
a,b,c = pickaxe()

In [53]:
X4[:100].shape

(100, 232)

In [13]:
len(b)

0

In [8]:
def final_processing(X,y, N=50):
    X_test = X[:N]
    y_test = y[:N]
    X_train = X[N:]
    y_train = y[N:]
    return X_train, y_train, X_test, y_test

In [9]:
X_train, y_train, X_test, y_test = final_processing(X4,y4)

In [70]:
from sklearn.ensemble import RandomForestClassifier

In [71]:
rf = RandomForestClassifier()

In [72]:
rf.fit(X_train,y_train)

  """Entry point for launching an IPython kernel.


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [73]:
rf.score(X_test,y_test)

0.46

In [74]:
from sklearn.linear_model import LogisticRegression

In [81]:
lr = LogisticRegression()

In [82]:
lr.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [83]:
lr.score(X_test,y_test)

0.6