# DOTA2 - Predicting which team will win using only hero selection

Problem Statement - Every game of DOTA 2 is started by picking heros. 5 people on one team and 5 people on the other team must each pick unique hero from a roster of 119 choices. These heroes all have unique special abilities and perform better or worse against different oppoent heroes. As there are strengths and weaknesses in hero matchups, we can leverage this information to predict which team of 5 will win based solely on which heroes were picked.


## Using d2api python wrapper to get match data from Steam Web API

In [1]:
# installed the d2api 1.0.0 python module 
# pip install d2api

In [2]:
#https://d2api.readthedocs.io/en/latest/tutorial.html

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import time

import d2api
from d2api.src import entities

In [2]:
APIKey_file = open(r'C:/Users/Erik/Desktop/steam_web_API.txt', 'r')

In [3]:
# overrides the environment variable key
api = d2api.APIWrapper(APIKey_file.readline())

### Testing the API to make sure the most recent game data is displayed

In [4]:
# most recent match_id played
test_match = api.get_match_history(min_players=10,skill=3, matches_requested=1)
print(test_match['results_remaining'])
for match in test_match['matches']:
    print(match['match_id'])

499
5630598815


In [8]:
# Reading in the collected match data 
df_match_and_details = pd.read_csv('../data/df_match_and_details.csv')

In [11]:
df_match_and_details

Unnamed: 0,match,start_time,game_mode,lobby_type,duration,winner,radient_player_1,radient_player_2,radient_player_3,radient_player_4,radient_player_5,dire_player_1,dire_player_2,dire_player_3,dire_player_4,dire_player_5
0,5617764354,1600297049,22,7,1038,dire,Hero(hero_id = 8),Hero(hero_id = 105),Hero(hero_id = 86),Hero(hero_id = 11),Hero(hero_id = 98),Hero(hero_id = 110),Hero(hero_id = 46),Hero(hero_id = 19),Hero(hero_id = 44),Hero(hero_id = 38)
1,5617764355,1600297049,23,0,1518,dire,Hero(hero_id = 40),Hero(hero_id = 19),Hero(hero_id = 54),Hero(hero_id = 69),Hero(hero_id = 105),Hero(hero_id = 70),Hero(hero_id = 93),Hero(hero_id = 51),Hero(hero_id = 86),Hero(hero_id = 58)
2,5617756164,1600296132,22,7,1997,radiant,Hero(hero_id = 5),Hero(hero_id = 36),Hero(hero_id = 13),Hero(hero_id = 14),Hero(hero_id = 41),Hero(hero_id = 126),Hero(hero_id = 84),Hero(hero_id = 10),Hero(hero_id = 100),Hero(hero_id = 21)
3,5617754116,1600295927,22,0,1926,radiant,Hero(hero_id = 60),Hero(hero_id = 15),Hero(hero_id = 34),Hero(hero_id = 104),Hero(hero_id = 107),Hero(hero_id = 65),Hero(hero_id = 70),Hero(hero_id = 126),Hero(hero_id = 31),Hero(hero_id = 108)
4,5617754119,1600295927,22,7,2074,dire,Hero(hero_id = 82),Hero(hero_id = 39),Hero(hero_id = 112),Hero(hero_id = 10),Hero(hero_id = 18),Hero(hero_id = 104),Hero(hero_id = 62),Hero(hero_id = 67),Hero(hero_id = 84),Hero(hero_id = 21)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74320,5629943532,1601038353,18,0,1612,radiant,Hero(hero_id = 81),Hero(hero_id = 106),Hero(hero_id = 78),Hero(hero_id = 79),Hero(hero_id = 56),Hero(hero_id = 73),Hero(hero_id = 90),Hero(hero_id = 70),Hero(hero_id = 59),Hero(hero_id = 37)
74321,5629943992,1601038374,23,0,1702,dire,Hero(hero_id = 84),Hero(hero_id = 6),Hero(hero_id = 14),Hero(hero_id = 20),Hero(hero_id = 31),Hero(hero_id = 19),Hero(hero_id = 112),Hero(hero_id = 22),Hero(hero_id = 83),Hero(hero_id = 42)
74322,5629944649,1601038399,4,0,1922,dire,Hero(hero_id = 38),Hero(hero_id = 107),Hero(hero_id = 109),Hero(hero_id = 46),Hero(hero_id = 67),Hero(hero_id = 85),Hero(hero_id = 62),Hero(hero_id = 6),Hero(hero_id = 40),Hero(hero_id = 41)
74323,5629947649,1601038530,3,7,1477,radiant,Hero(hero_id = 14),Hero(hero_id = 70),Hero(hero_id = 119),Hero(hero_id = 57),Hero(hero_id = 97),Hero(hero_id = 100),Hero(hero_id = 91),Hero(hero_id = 95),Hero(hero_id = 19),Hero(hero_id = 106)


## Step 0 - Get more data by running 500 sample gathers one per second for X loops

20 min per loop which collects 500 complete match data points and adds them to the complete data frame

In [20]:
# Reading in the collected match data 
df_match_and_details = pd.read_csv('../data/df_match_and_details.csv')
complete_match_list = df_match_and_details['match'].tolist()
len(complete_match_list)

96925

In [21]:
# Loop number determines how many matches are collected, 500 matches per loop
for loop in range(0,15):
# Collect 500 match id in 50 batches of 10
    #get the match id for the most recent game of Dota2 with 10 players and high skill level
    match_1 = api.get_match_history(min_players=10,skill=3, matches_requested=1)
    
    #setting the starting match id
    start_num = match_1['matches'][0]['match_id']

    #cycle 50 times to collect 500 matches in batches of 10. This prevents hitting the SteamAPI requests limit
    for cycle in range(0,51):
        # add a try and except to allow loop to run for 20 hours undistrubed
        try:
            match_history = api.get_match_history(min_players=10,skill=3, matches_requested=10, start_at_match_id=start_num)

            for match in match_history['matches']:
                complete_match_list.append(match['match_id'])

            #set the start matchID as the last one of in the complete list
            start_num = complete_match_list[-1]
            
        # if there is an error, wait 10 min before starting the next iteration of the loop
        except:
            time.sleep(600)
            
        # wait 20 sec between requests        
        time.sleep(20)
    
    # adding the new matches to the complete list, make a set first to make sure the new match data doesn't already exist
    complete_match_list = list(set(complete_match_list))
    
    # create a list of matches to get complete data for
    matches_need_to_get_details = [match for match in complete_match_list if match not in df_match_and_details['match'].tolist()]
    
    # Looping through the match which need details collected 
    for index,match in enumerate(matches_need_to_get_details):

        details_dict = {}
        
        # add a try and except to allow loop to run for 20 hours undistrubed
        try:
            match_details = api.get_match_details(match)

            # from the API add the data into a custom dictionary to make into a df later
            details_dict['match'] = [match_details['match_id']]
            details_dict['start_time'] = [match_details['start_time']]
            details_dict['game_mode'] = [match_details['game_mode']]
            details_dict['lobby_type'] = [match_details['lobby_type']]
            details_dict['duration'] = [match_details['duration']]
            details_dict['winner'] = [match_details['winner']]

            for player in range(0,5):
                details_dict[f'radient_player_{player+1}'] = [str(match_details['players_minimal'][player]['hero'])]
            for player in range(5,10):    
                details_dict[f'dire_player_{player-4}'] = [str(match_details['players_minimal'][player]['hero'])]

            # append the new match data on to the end of the existing df containing all of the match data
            temp_df = pd.DataFrame(details_dict)
            df_match_and_details = pd.concat([df_match_and_details,temp_df])

            # wait 1 sec between API calls
            time.sleep(2)
        
        # if there is an error, wait 10 min before starting the next iteration of the loop
        except:
            time.sleep(600)
            
    time.sleep(60)
    
    print(f'{loop}-loop done. Game {start_num} was just detailed.')
    
    df_match_and_details.to_csv("../data/df_match_and_details.csv", index=False)


499
0-loop done. Game 5633709765 was just detailed.
499
1-loop done. Game 5633730658 was just detailed.
499
2-loop done. Game 5633750851 was just detailed.
499
3-loop done. Game 5633765316 was just detailed.
499
4-loop done. Game 5633786091 was just detailed.
499
5-loop done. Game 5633807799 was just detailed.
499
6-loop done. Game 5633830765 was just detailed.
499
7-loop done. Game 5633852393 was just detailed.
499
8-loop done. Game 5633876532 was just detailed.
499
9-loop done. Game 5633899787 was just detailed.
499
10-loop done. Game 5633923369 was just detailed.
499
11-loop done. Game 5633945671 was just detailed.
499
12-loop done. Game 5633971584 was just detailed.
499
13-loop done. Game 5633995542 was just detailed.
499
14-loop done. Game 5634022590 was just detailed.


In [18]:
# exporting complete match data to be used in modeling
df_match_and_details.to_csv("../data/df_match_and_details.csv", index=False)