In [10]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

In [11]:
def get_cell_text(cell):
    cell_text = cell.text.strip()
    if cell_text == "":
        return None
    else:
        return cell_text
    
def get_tournament_data(tournament_data_elem):
    sub_divs = tournament_data_elem.find_all('div')
    sub_div_texts = [sub_div.get_text().strip() for sub_div in sub_divs]

    start_date = None
    end_date = None
    patch_code = None
    
    for i in range(len(sub_div_texts)-1):
        sub_div_text = sub_div_texts[i].lower()
        if ("start" in sub_div_text) and ("date" in sub_div_text):
            start_date = sub_div_texts[i+1]
        if ("end" in sub_div_text) and ("date" in sub_div_text):
            end_date = sub_div_texts[i+1]
        if ("patch" in sub_div_text):
            patch_code = sub_div_texts[i+1]

    return {
        'start_date': start_date,
        'end_date': end_date,
        'patch_code': patch_code
    }

def get_points_list(result_list):
    points_list = [0]  # Initialize with 0 as the first value
    cumulative_value = 0

    for value in result_list[:-1]:
        if value is not None:
            cumulative_value += value
        points_list.append(cumulative_value)

    return points_list

def get_team_names(match_elem):
    header_elems = match_elem.find('div', class_ = "brkts-popup-header-dev")
    team_name_elems = header_elems.find_all('span', class_ = "name")
    t1_name_elem = team_name_elems[0]
    t2_name_elem = team_name_elems[1]

    t1_name = t1_name_elem.find('a').get('title')
    t2_name = t2_name_elem.find('a').get('title')

    return t1_name, t2_name

def parse_game_elems(game_elems):

    t1_sides_data = []
    t1_picks_data = []
    t1_result_data = []

    t2_sides_data = []
    t2_picks_data = []
    t2_result_data = []

    game_time_str_data = []


    for game_elem in game_elems:
        pick_elems = game_elem.find_all('div', attrs={'class': False})
        t1_pick_elem = pick_elems[0]
        t2_pick_elem = pick_elems[1]

        t1_side, t1_heroes = parse_pick_elem(t1_pick_elem)
        t2_side, t2_heroes = parse_pick_elem(t2_pick_elem)

        t1_sides_data.append(t1_side)
        t1_picks_data.append(t1_heroes)
        t2_sides_data.append(t2_side)
        t2_picks_data.append(t2_heroes)

        game_time_elem = game_elem.find('div', class_ = "brkts-popup-body-element-vertical-centered")
        game_time = game_time_elem.text.strip()
        
        game_time_str_data.append(game_time)

        result_elems = game_elem.find_all('div', class_ = "brkts-popup-spaced")
        t1_res_elem = result_elems[0]
        t2_res_elem = result_elems[1]
        t1_result = check_green_tick(t1_res_elem)        
        t2_result = check_green_tick(t2_res_elem)

        t1_result_data.append(t1_result)
        t2_result_data.append(t2_result)

    t1_points_data = get_points_list(t1_result_data)
    t2_points_data = get_points_list(t2_result_data)

    return {
        't1_side': t1_sides_data,
        't1_picks': t1_picks_data,
        't1_result': t1_result_data,

        't2_side': t2_sides_data,
        't2_picks': t2_picks_data,
        't2_result': t2_result_data,

        'game_time_str': game_time_str_data,
        't1_points': t1_points_data,
        't2_points': t2_points_data,
    }

def parse_pick_elem(team_pick_elem):
    hero_elems = team_pick_elem.find_all('div')
    hero_colors = []
    hero_names = []

    for hero_elem in hero_elems:

        #get team color
        curr_hero_color = hero_elems[0].get('class')[0]
        if 'blue' in curr_hero_color:
            hero_colors.append('blue')
        elif 'red' in curr_hero_color:
            hero_colors.append('red')
        else:
            hero_colors.append('unknown')

        #get hero names
        hero_name = hero_elem.find('a').get('title')
        hero_names.append(hero_name)

    # get team side
    first_color = hero_colors[0]

    # Compare the first element with the rest of the elements
    if all(element == first_color for element in hero_colors):
        team_side = first_color
    else:
        team_side = None

    return team_side, hero_names

def check_green_tick(result_elem):
    img_elem = result_elem.find('img')
    src_text = img_elem.get('src')
    if 'green' in src_text.lower():
        return 1
    if 'no' in src_text.lower():
        return 0
    else:
        return None
    
def parse_ban_elem(ban_elem):
    ban_table_elem = ban_elem.find('table')

    t1_bans_data = []
    t2_bans_data = []

    rows = ban_table_elem.find_all('tr')
    for row in rows[1:]:
        cells = row.find_all('td')
        t1_bans_cell = cells[0]
        t2_bans_cell = cells[2]

        t1_bans_elems = t1_bans_cell.find_all('a')
        t2_bans_elems = t2_bans_cell.find_all('a')

        t1_bans = [elem.get('title') for elem in t1_bans_elems]
        t2_bans = [elem.get('title') for elem in t2_bans_elems]

        t1_bans_data.append(t1_bans)
        t2_bans_data.append(t2_bans)

    return {
        't1_bans': t1_bans_data,
        't2_bans': t2_bans_data
    }

def get_game_data(soup):
    # Get list of elements containing match info
match_elems = soup.find_all('div', class_='brkts-popup brkts-match-info-popup')

game_data_df = pd.DataFrame()

for match_elem in match_elems[0:1]:

    t1_name, t2_name = get_team_names(match_elem)

    # Zoom into match details element
    match_details_elem = match_elem.find('div', class_ = "brkts-popup-body")

    date_elem = match_details_elem.find('span', class_ = "timer-object")
    match_date = date_elem.text.strip()

    game_elems = match_details_elem.find_all('div', class_ = "brkts-popup-body-element brkts-popup-body-game")
    game_data_dict = parse_game_elems(game_elems)
    
    ban_elem = match_details_elem.find('div', class_ = "brkts-popup-mapveto")
    ban_data_dict = parse_ban_elem(ban_elem)

    match_data_dict = {**game_data_dict, **ban_data_dict}

    # Create match_data_df
    match_data_df = pd.DataFrame(
        match_data_dict
    )

    match_data_df['date'] = match_date
    match_data_df['t1_name'] = t1_name
    match_data_df['t2_name'] = t2_name

    game_data_df = pd.concat([game_data_df, match_data_df], ignore_index=True)

In [3]:
tournament_main_url = "https://liquipedia.net/mobilelegends/M4_World_Championship"

response = requests.get(tournament_main_url)

soup = BeautifulSoup(response.text, 'html.parser')

In [15]:
def check_qualifying_stage_url(a_elem):
    elem_text = a_elem.get_text().lower()
    if ("click" in elem_text) and ("result" in elem_text):
        return True
    else:
        return False

a_tags_elems = soup.find_all('a')
qualifying_stage_elem = list(filter(check_qualifying_stage_url, a_tags_elems))[0]
qualifying_stage_url = qualifying_stage_elem.get('href')
print(qualifying_stage_url)


/mobilelegends/M4_World_Championship/Group_Stage


In [6]:
tournament_name_elem = soup.find('h1', class_='firstHeading')
tournament_name = tournament_name_elem.text.strip()
print(tournament_name)

M4 World Championship


In [9]:
tournament_data_elem = soup.find('div', class_='fo-nttax-infobox-wrapper infobox-mobilelegends')
# Get tournament data
tournament_date_dict = get_tournament_data(tournament_data_elem)

# Get the current date
current_date = datetime.now()

# Format the date as 'yyyymmdd'
formatted_date = current_date.strftime('%Y%m%d')



2023-01-01
2023-01-15
1.7.44C


In [88]:


print(game_data_df)

  t1_side                                    t1_picks t1_result t2_side  \
0     red  [Lapu-Lapu, Fredrinn, Yve, Karrie, Khufra]         L    blue   
1    blue      [Uranus, Fanny, Kagura, Harith, Atlas]         L     red   
2     red    [Gloo, Martis, Pharsa, Irithel, Natalia]         L    blue   

                                     t2_picks t2_result game_time_str  \
0  [Kaja, Beatrix, Lylia, Hayabusa, Yu Zhong]         W         15:25   
1        [Kaja, Karrie, Kadita, Martis, Gloo]         W         18:06   
2  [Mathilda, Wanwan, Lylia, Hayabusa, Grock]         W         13:09   

                                 t1_ban  \
0  [Gloo, Fanny, Wanwan, Harith, Lunox]   
1    [Joy, Yve, Wanwan, Hayabusa, Ling]   
2   [Kaja, Fanny, Yve, Kadita, Rafaela]   

                                            t2_ban  \
0        [Natalia, Atlas, Joy, Valentina, Faramis]   
1  [Grock, Lapu-Lapu, Valentina, Natalia, Faramis]   
2           [Lunox, Ling, Joy, Valentina, Faramis]   

                

In [None]:
# Output to csv
csv_name = "game_info.csv"
csv_path = os.path.join(script_dir, csv_name)

hero_df.to_csv(csv_path, index=False) 

print(f"DataFrame saved to {csv_path}")