In [1]:
from bs4 import BeautifulSoup
from IPython.display import HTML

import requests
import asyncio
import aiohttp

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams.update({
  'font.size': 16,
  'grid.alpha': 0.25})

In [3]:
url = "https://mobile-legends.fandom.com/wiki/List_of_heroes"    # Edit this with your selected URL
text = requests.get(url).text                                    # This will get the HTML source

soup = BeautifulSoup(text, 'html5lib')                           # Parse the HTML source into soup object

In [4]:
print(soup.prettify()[:])

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of heroes | Mobile Legends: Bang Bang Wiki | Fandom
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"249d0611101edcb8","wgCSPNonce":false,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_heroes","wgTitle":"List of heroes","wgCurRevisionId":144911,"wgRevisionId":144911,"wgArticleId":14076,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Pages with broken file links","Heroes","Shop"],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Li

## Get the column names 

In [5]:
table = soup.find("table")

column_name = []
for th in table.find_all("th"):
  column_name.append(th.text.strip())

column_name

['Hero',
 'Name',
 'Hero Code',
 'Role(s)',
 'Specialties',
 'Laning',
 'Release Year',
 'Price']

## Get all the rows

In [6]:
rows = []

for idx_row, tr in enumerate(table.find_all("tr")[1:]):    # Skip the first row because it is the header
  row_data = {}
  all_td = tr.find_all("td")
  row_data["name"]          = all_td[1].text.strip()
  
  ## Get the url page for hero
  row_data["hero_url_page"] = "/".join(url.split("/")[:-1] + [row_data["name"]])

  row_data["hero_code"]     = all_td[2].text.strip()
  row_data["role"]          = all_td[3].text.strip()
  row_data["specialties"]   = all_td[4].text.strip()
  row_data["laning"]        = all_td[5].text.strip()
  row_data["release_year"]  = all_td[6].text.strip()


  ## Special handling for price column
  price = all_td[7]
  try:
    row_data["price"] = " | ".join([a_tag["title"] 
                                      for a_tag in price.find_all("a")])
  except (KeyError):
    print(f"specific parsing; idx_row: {idx_row}; hero_name: {row_data['name']}") 
    price_unit = price.find_all("img")[0]["alt"]
    row_data["price"] = price.text.strip() + " " + price_unit

  rows.append(row_data)

rows

specific parsing; idx_row: 45; hero_name: Odette


[{'name': 'Miya',
  'hero_url_page': 'https://mobile-legends.fandom.com/wiki/Miya',
  'hero_code': '1',
  'role': 'Marksman',
  'specialties': 'Reap | Damage',
  'laning': 'Gold Laner',
  'release_year': '2016',
  'price': '10800 Battle Points | 399 ticket'},
 {'name': 'Balmond',
  'hero_url_page': 'https://mobile-legends.fandom.com/wiki/Balmond',
  'hero_code': '2',
  'role': 'Fighter',
  'specialties': 'Damage | Regen',
  'laning': 'EXP Laner',
  'release_year': '2016',
  'price': '6500 Battle Points | 299 diamonds'},
 {'name': 'Saber',
  'hero_url_page': 'https://mobile-legends.fandom.com/wiki/Saber',
  'hero_code': '3',
  'role': 'Assassin',
  'specialties': 'Charge | Reap',
  'laning': 'Jungler',
  'release_year': '2016',
  'price': '6500 Battle Points | 299 diamonds'},
 {'name': 'Alice',
  'hero_url_page': 'https://mobile-legends.fandom.com/wiki/Alice',
  'hero_code': '4',
  'role': 'Mage |  Tank',
  'specialties': 'Charge | Regen',
  'laning': 'Mid Laner',
  'release_year': '201

Exploration detail to get url for hero

In [7]:
tr2 = table.find_all("tr")[2]
tr2_colURL = "/".join(url.split("/")[:-1] 
                        +  [tr2.find_all("td")[1].text.strip()])
tr2_colURL

'https://mobile-legends.fandom.com/wiki/Balmond'

Exploration detail to get the code for extracting price column 

In [8]:
tr2 = table.find_all("tr")[2]    # Skip the first row because it is the header
tr2_colPrice = tr2.find_all("td")[7]
tr2_colPrice = " | ".join([a_tag["title"] for a_tag in tr2_colPrice.find_all("a")])
tr2_colPrice

'6500 Battle Points | 299 diamonds'

Exploration detail to handle specific format in row 45+1 (plus header), and price column

In [9]:
tr45 = table.find_all("tr")[46]
tr45_colPrice = tr45.find_all("td")[7]
tr45_colPrice_unit = tr45_colPrice.find_all("img")[0]["alt"]
tr45_colPrice = tr45_colPrice.text.strip() + " " + tr45_colPrice_unit
tr45_colPrice 

'20 Lucky Gem'

## Turn into DataFrame

In [10]:
df_heroes = pd.DataFrame(rows)
df_heroes

Unnamed: 0,name,hero_url_page,hero_code,role,specialties,laning,release_year,price
0,Miya,https://mobile-legends.fandom.com/wiki/Miya,1,Marksman,Reap | Damage,Gold Laner,2016,10800 Battle Points | 399 ticket
1,Balmond,https://mobile-legends.fandom.com/wiki/Balmond,2,Fighter,Damage | Regen,EXP Laner,2016,6500 Battle Points | 299 diamonds
2,Saber,https://mobile-legends.fandom.com/wiki/Saber,3,Assassin,Charge | Reap,Jungler,2016,6500 Battle Points | 299 diamonds
3,Alice,https://mobile-legends.fandom.com/wiki/Alice,4,Mage | Tank,Charge | Regen,Mid Laner,2016,15000 Battle Points | 399 diamonds
4,Nana,https://mobile-legends.fandom.com/wiki/Nana,5,Mage,Poke | Burst,Mid Laner,2016,6500 Battle Points | 299 diamonds
...,...,...,...,...,...,...,...,...
116,Fredrinn,https://mobile-legends.fandom.com/wiki/Fredrinn,117,Tank | Fighter,Damage | Chase,EXP Laner,2022,32000 Battle Points | 599 diamonds
117,Joy,https://mobile-legends.fandom.com/wiki/Joy,118,Assassin,Chase | Damage,EXP Laner,2022,32000 Battle Points | 599 diamonds
118,Novaria,https://mobile-legends.fandom.com/wiki/Novaria,119,Mage,Burst | Poke,Mid Laner,,32000 Battle Points | 599 diamonds
119,Arlott,https://mobile-legends.fandom.com/wiki/Arlott,120,Fighter,Charge | Burst,EXP Laner,,32000 Battle Points | 599 diamonds


## Get statistics of specific Herro

In [11]:
def get_hero_by_name(df, name):
  row = df.loc[df["name"] == name]

  url = row["hero_url_page"].values[0]
  text = requests.get(url).text
  soup = BeautifulSoup(text, 'html5lib')
  return soup 


def get_attributes(hero_soup):
  try:
    attributes = hero_soup.find("span", {"id": "Attributes"})\
                    .find_parent("h2").find_next("table")
  except (AttributeError):
    return [False, 'HTML format is different from the Fandom standard table']

  # display(attributes)
  rows = []
  for tr in attributes.find_all('tr')[2:]:      # skip the first two rows
    row_data = {}
    all_td = tr.find_all("td")
    row_data["attribute"] = all_td[0].text.strip()
    
    # If there is no change during the level progression, set it to the 
    # same as base_lvl_01.
    row_data["base_lvl_01"] = all_td[1].text.strip()
    row_data["base_lvl_15"] = all_td[2].text.strip() \
      if len(all_td) == 4 else all_td[1].text.strip()


    rows.append(row_data)
  
  return rows

In [12]:
pick_hero = "Kadita"    # This page does not meet Fandom standard
# pick_hero = "Natalia"
hero_soup = get_hero_by_name(df_heroes, pick_hero)

### Some rules related to attributes

- All units have no base and extra magic power, but it can be increased     
  through talents, equipment, and hero skills
- Physical penetration, Magic penetration start at 0 but can be obtained     
  through emblems, items or even her abilities

In [13]:
get_attributes(hero_soup)
  

[False, 'HTML format is different from the Fandom standard table']

## Get attributes for all possible scraped-data of heroes

In [14]:
async def worker_list():
  print("W")

async def do_all_tasks(urls):
  print("DO_ALL_TASKS: Wait for worker")
  results = await asyncio.gather(*worker_list())
  print("DO_ALL_TASKS: results from calls")
  display(results)

def fetch_all(hero_urls):
  print("FETCH_ALL: Starting fetch all pages")
  asyncio.run(do_all_tasks)
  print("FETCH_ALL: Done")

In [15]:
hero_urls = df_heroes["hero_url_page"].to_list()
hero_urls_selected = hero_urls[:5]
hero_urls_selected

['https://mobile-legends.fandom.com/wiki/Miya',
 'https://mobile-legends.fandom.com/wiki/Balmond',
 'https://mobile-legends.fandom.com/wiki/Saber',
 'https://mobile-legends.fandom.com/wiki/Alice',
 'https://mobile-legends.fandom.com/wiki/Nana']

In [18]:
async def worker_list(urls):
  reponses = [None for _ in range(len(urls))]
  for idx, url in enumerate(urls):
    responses[idx] = await aiohttp.request('GET', url)
  return responses


RuntimeError: asyncio.run() cannot be called from a running event loop