In [1]:
import re
from urllib.request import urlopen, urlretrieve, Request
from urllib.parse import urljoin
from urllib.error import HTTPError, URLError

from bs4 import BeautifulSoup
import pandas as pd


def url2soup(url):
    """Fetch a webpage a return a BeautifulSoup soup object from its HTML"""
    try:
        request = Request(url, headers={'User-agent' : 'Magic Browser'})
        html_handler = urlopen(request)
        html = html_handler.read()
        soup = BeautifulSoup(html, 'html.parser')
    except (HTTPError, URLError, ConnectionResetError) as e:
        print('Error fetching {} : {}'.format(url.lower(), e))
        soup = None
    return soup

In [3]:
def parse_hotslogs_table(soup):
    # Find the tables with a body
    tables_soup = [soup for soup in soup.find_all('table') if soup.find('tbody')]
    assert len(tables_soup) == 1
    table_soup = tables_soup[0]
    header_soup = table_soup.find('thead').find('tr')
    header = [field.text for field in table_soup.find_all('th')]
    body_soup = soup.find('tbody')
    body = []
    for row in body_soup.find_all('tr'):
        fields = [field.text for field in row.find_all('td')]
        body.append(fields)
    return header, body

# Winrates

In [4]:
url = 'https://www.hotslogs.com/Sitewide/HeroAndMapStatistics'
soup = url2soup(url)
header, body = parse_hotslogs_table(soup)
df_winrate = pd.DataFrame(data=body, columns=header).set_index('Hero')

In [9]:
df_winrate

Unnamed: 0_level_0,Unnamed: 1_level_0,Games Played,Games Banned,Popularity,Win Percent,% Δ,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0
Hero,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Malthael,,6994,12209,43.1 %,55.2 %,0.3 %,Assassin,Sustained Damage,"Maltael,Malthaël,Малтаэль,马萨伊尔,瑪瑟爾,말티엘"
Samuro,,3605,1295,11.0 %,54.9 %,0.8 %,Assassin,Ambusher,"萨穆罗,薩姆羅,Самуро,사무로,Samura"
The Lost Vikings,,834,1217,4.6 %,54.4 %,3.4 %,Specialist,Utility,"Os Vikings Perdidos,Потерявшиеся викинги,Los V..."
Rehgar,,13636,2565,36.3 %,54.4 %,1.9 %,Support,Healer,"РЕГАР,레가르,雷加尔,雷加"
Rexxar,,1523,229,3.9 %,54.2 %,1.4 %,Warrior,Tank,"Рексар,雷克萨,雷克薩,렉사르"
Gazlowe,,2250,173,5.4 %,53.8 %,2.2 %,Specialist,Siege,"Gazleu,Gazol,Sparachiodi,ГАЗЛОУ,Gasganete,가즈로,..."
Azmodan,,6818,1589,18.9 %,53.7 %,0.7 %,Specialist,Siege,"Asmodan,Азмодан,Azmodán,Azmadan,아즈모단,阿兹莫丹,阿茲莫丹"
Sonya,,9791,1383,25.1 %,53.5 %,1.0 %,Warrior,Bruiser,"Sonia,Barbare,Barbarian,Barbarin,Bárbara,Соня,..."
Xul,,4131,2392,14.6 %,53.1 %,-0.7 %,Specialist,Siege,"蘇爾,祖尔,줄,Зул"
Zagara,,6716,1769,19.0 %,52.9 %,0.4 %,Specialist,Siege,"ЗАГАРА,자가라,扎加拉,札迦拉"


# Tier list

In [10]:
url = 'https://www.tentonhammer.com/articles/heroes-of-the-storm-tier-list-quickmatch'
soup = url2soup(url)

In [11]:
tables = soup.find_all('table')

In [12]:
tenton2hotslogs = {
    'T.L.V': 'The Lost Vikings',
    'E.T.C': 'E.T.C.',
    'Lucio': 'Lúcio',
    "Cho'gall": 'Cho',
}

# Add new columns
df_winrate['Tier num'] = None
df_winrate['Tier'] = None
df_winrate['Role'] = None
df_winrate['Difficulty'] = None
for i, table in enumerate(tables):
    header = table.find('thead')
    tier = header.find_all('th')[0].text.replace('\n', '')
    rows = table.find('tbody').find_all('tr')
    for row in rows:
        fields = row.find_all('td')
        name = re.sub("[^a-zA-Z-. ']*", '', fields[0].text)
        name = tenton2hotslogs.get(name, name)
        role = fields[2].text
        difficulty = fields[3].text
        
        df_winrate.loc[name, 'Tier num'] = i + 1
        df_winrate.loc[name, 'Tier'] = tier
        df_winrate.loc[name, 'Role'] = role
        df_winrate.loc[name, 'Difficulty'] = difficulty

In [13]:
df_winrate = df_winrate.sort_values(by=['Tier num', 'Win Percent'], ascending=[True, False])
df_winrate[
    (df_winrate['Tier num'] <= 4)
    #& ((df_winrate['difficulty'] == 'Easy') | (df_winrate['difficulty'] == 'Medium'))
    #& ((df_winrate['role'] == 'Specialist'))
]

Unnamed: 0_level_0,Unnamed: 1_level_0,Games Played,Games Banned,Popularity,Win Percent,% Δ,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Tier num,Tier,Role,Difficulty
Hero,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Sonya,,9791,1383,25.1 %,53.5 %,1.0 %,Warrior,Bruiser,"Sonia,Barbare,Barbarian,Barbarin,Bárbara,Соня,...",1,S+ Tier,Warrior,Medium
Varian,,13861,6455,45.6 %,50.6 %,1.8 %,Assassin,Bruiser,"瓦里安,바리안,Вариан,바리인",1,S+ Tier,Tank/Bruiser/Assassin,Medium
Valeera,,2536,1675,9.4 %,47.3 %,0.4 %,Assassin,Ambusher,"Valira,Валира,瓦莉拉,瓦麗拉,발리라",1,S+ Tier,Assassin,Hard
Samuro,,3605,1295,11.0 %,54.9 %,0.8 %,Assassin,Ambusher,"萨穆罗,薩姆羅,Самуро,사무로,Samura",2,S Tier,Assassin,Medium
Rehgar,,13636,2565,36.3 %,54.4 %,1.9 %,Support,Healer,"РЕГАР,레가르,雷加尔,雷加",2,S Tier,Support,Medium
Azmodan,,6818,1589,18.9 %,53.7 %,0.7 %,Specialist,Siege,"Asmodan,Азмодан,Azmodán,Azmadan,아즈모단,阿兹莫丹,阿茲莫丹",2,S Tier,Specialist,Easy
Anub'arak,,10315,6630,38.0 %,51.4 %,0.7 %,Warrior,Tank,"Anub’arak,Ануб'арак,아눕아락,阿努巴拉克",2,S Tier,Warrior,Medium
Lúcio,,5579,791,14.3 %,51.3 %,0.6 %,Support,Healer,"Lucio,Лусио,루시우,卢西奥,路西歐",2,S Tier,Support,Medium
Sgt. Hammer,,1829,347,4.9 %,50.8 %,-0.9 %,Specialist,Siege,"Sgt Marteau,Sergeant Hammer,Sgto. Hammer,Sierż...",2,S Tier,Specialist,Medium
Ragnaros,,4085,1206,11.9 %,50.7 %,-0.1 %,Assassin,Sustained Damage,"拉格納羅斯,拉格纳罗斯,라그나로스,Рагнарос",2,S Tier,Assassin,Medium


# Duos

In [14]:
import time

from selenium import webdriver

In [15]:
def get_duo_table_soup(driver, url):
    driver.get(url)

    dropdown_button = driver.find_element_by_xpath(
        '//*[@id="ctl00_MainContent_DropDownGameMode"]/span/span[2]'
    )
    dropdown_button.click()
    quickmatch_button = driver.find_element_by_xpath(
        '//*[@id="ctl00_MainContent_DropDownGameMode_DropDown"]/div/ul/li[4]'
    )
    quickmatch_button.click()
    duos_button = driver.find_element_by_link_text('Duos')
    duos_button.click()


    # This while loop is not very useful because we actually need to wait
    # for the data to be loaded afterwards.
    game_mode = ''
    while game_mode != 'Quick Match':
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        game_mode = soup.find('div', {'id': 'ctl00_MainContent_DropDownGameMode'}).text.strip()
    time.sleep(1)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    return soup.find(
        'div',
        {'id': 'winRateWithOtherHeroes'}
    )

In [16]:
firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference('permissions.default.image', 2)
firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')

driver = webdriver.Firefox(firefox_profile=firefox_profile)

df_duos = None
for hero in list(df_winrate.index):
    print(hero)
    url = 'https://www.hotslogs.com/Sitewide/HeroDetails?Hero=' + hero
    table_soup = get_duo_table_soup(driver, url)
    header, body = parse_hotslogs_table(table_soup)
    header = ['Hero'] + header
    body = [[hero] + row for row in body]
    df = pd.DataFrame(data=body, columns=header)
    df_duos = pd.concat([df_duos, df])
driver.close()

WebDriverException: Message: 'geckodriver' executable needs to be in PATH. 


In [31]:
df_duos = df_duos.sort_values(by='Win Percent With', ascending=False)
df_duos[
    df_duos['Games Played With'].str.replace(',', '').astype(int) > 100
].head(30)

Unnamed: 0,Hero,Unnamed: 2,Team Hero,Games Played With,Win Percent With,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Rexxar,,Arthas,110,68.2 %,Warrior,Bruiser,"АРТАС,아서스,阿尔萨斯,阿薩斯,Артас"
0,Gall,,Xul,315,67.6 %,Specialist,Siege,"蘇爾,祖尔,줄,Зул"
1,Cho,,Xul,357,66.7 %,Specialist,Siege,"蘇爾,祖尔,줄,Зул"
0,The Butcher,,Xul,1430,66.6 %,Specialist,Siege,"蘇爾,祖尔,줄,Зул"
4,The Lost Vikings,,Rehgar,122,66.4 %,Support,Healer,"РЕГАР,레가르,雷加尔,雷加"
1,The Butcher,,Ragnaros,1084,66.3 %,Assassin,Sustained Damage,"拉格納羅斯,拉格纳罗斯,라그나로스,Рагнарос"
2,The Butcher,,Murky,1666,65.7 %,Specialist,Utility,"Bourbie,Męcik,Fosky,МУРЧАЛЬ,Murquinho,머키,鱼人,奔波..."
0,Xul,,The Butcher,1243,65.6 %,Assassin,Ambusher,"El Carnicero,O Açougueiro,屠夫,도살자,Der Schlächte..."
3,The Butcher,,Azmodan,2626,65.4 %,Specialist,Siege,"Asmodan,Азмодан,Azmodán,Azmadan,아즈모단,阿兹莫丹,阿茲莫丹"
1,Xul,,Azmodan,1054,65.3 %,Specialist,Siege,"Asmodan,Азмодан,Azmodán,Azmadan,아즈모단,阿兹莫丹,阿茲莫丹"
