In [1]:
from bs4 import BeautifulSoup, Comment
from urllib.request import urlopen
import csv
import re
import time
import random
import pandas as pd
from itertools import repeat
import os

In [2]:
#For saving 
def get_folder_path(folder_name,normalized_tournament,file_sufix):
    if file_sufix is None:
        print("Add file sufix")
    
    folder_path = os.path.join(folder_name, normalized_tournament)
    os.makedirs(folder_path, exist_ok=True)

    file_path = os.path.join(folder_path, f'{file_sufix}_{normalized_tournament}.csv')

    return file_path

def normalize_filename(name):
    name = name.lower()
    name = re.sub(r'[^\w\s-]', '', name)
    name = re.sub(r'\s+', '_', name)
    return name.strip('_')

def save_draft_to_csv(draft, url,folder="csv",encoding = 'utf-8'):
    tournament_name = draft['team_A'][-1]  # Medio raro esto
    normalized_tournament = normalize_filename(tournament_name)

    file_path = get_folder_path(folder_name=folder,normalized_tournament=normalized_tournament,file_sufix="draft")

    header = draft["header"] + ["source_url"]
    file_exists = os.path.isfile(file_path)

    with open(file_path, mode='a', newline='', encoding=encoding) as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(header)
        writer.writerow(draft["team_A"] + [url])
        writer.writerow(draft["team_B"] + [url])

def save_round_detail_to_csv(detail_round_dict, folder="csv", encoding='utf-8'):  # stats from the teams
    """Copy info to the dictionary"""
    tournament_name = detail_round_dict["event"][0]  # Medio raro esto
    normalized_tournament = normalize_filename(tournament_name)

    file_path = get_folder_path(folder_name=folder,normalized_tournament=normalized_tournament,file_sufix="round_detail")

    header = list(detail_round_dict)
    file_exists = os.path.isfile(file_path)

    with open(file_path, "a", newline="",encoding=encoding) as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(header)

        writer.writerows(zip(*detail_round_dict.values()))
        f.close()

def save_player_performance_to_csv(player_performance_dict, folder="csv", encoding='utf-8'):
    tournament_name = player_performance_dict["event"][0]
    normalized_tournament = normalize_filename(tournament_name)
    
    file_path = get_folder_path(folder_name=folder,normalized_tournament=normalized_tournament,file_sufix="player_performance")
    
    header = player_performance_dict.keys()
    file_exists = os.path.isfile(file_path)

    with open(file_path, "a", newline="",encoding=encoding) as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(header)

        writer.writerows(zip(*player_performance_dict.values()))
        f.close()

def save_team_economy(economy_dict, folder="csv",encoding="utf-8"):
    tournament_name = economy_dict["event"][0]
    normalized_tournament = normalize_filename(tournament_name)
    
    file_path = get_folder_path(folder_name=folder,normalized_tournament=normalized_tournament,file_sufix="team_economy")
    
    header = economy_dict.keys()
    file_exists = os.path.isfile(file_path)

    with open(file_path, "a", newline="",encoding=encoding) as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(header)

        writer.writerows(zip(*economy_dict.values()))
        f.close()

In [9]:
#soup opener
def soup_open(url=None, decode="iso-8859-1"):
    """Open an url with BeautifulSoup and return the html

    Args:
        url (str, optional): _description_. Defaults to None.
        decode (str, optional): _description_. Defaults to "iso-8859-1".

    Returns:
        bs4.BeautifulSoup: _description_
    """
    if url is None:
        print("Add a url")

    page = urlopen(url)
    html = page.read().decode(decode)
    soup = BeautifulSoup(html, "html.parser")

    return soup

#Basic match info: 
def get_basic_match_info(soup):
    """extract the event name from a match

    Args:
        soup (bs4.BeautifulSoup): _description_

    Returns:
        str: Event name
    """
    basic_match_info = {
        "teams": None,
        "event": None,
        "tournament_instance": None,
        "type": None,
    }

    event_text = soup.find("title").get_text(strip=True)
    regex = r"^([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)$"
    result = re.search(regex, event_text)

    for index, key in enumerate(basic_match_info.keys(), 1):
        basic_match_info[key] = result.group(index).strip()

    team_dict = {
        "team_a": None,
        "team_b": None,
        "team_a_tricode": None,
        "team_b_tricode": None,
        "event": None,
        "status": None,
        "bo": None,
        "date": None,
        "patch": None,
        "tournament_instance": None,
        "type": None,
    }

    event = basic_match_info["event"]

    team_dict["tournament_instance"] = basic_match_info["tournament_instance"]
    team_dict["type"] = basic_match_info["type"]

    teams_string = basic_match_info["teams"]
    pattern = r"^(.+?)\s+vs\.\s+(.+)$"

    result = re.findall(pattern, teams_string)

    teams = list(result[0]) if result else []
    team_tricodes = soup.find_all("div", {"class": "team"})

    if team_dict["team_a"] is not None:
        pass

    else:
        teamA_tricode = team_tricodes[2].get_text(strip=True)

        team_dict["team_a"] = teams[0]
        team_dict["team_a_tricode"] = teamA_tricode.strip()
        team_dict["event"] = event

    if team_dict["team_b"] is not None:
        pass
    else:
        teamB_tricode = team_tricodes[3].get_text(strip=True)

        team_dict["team_b"] = teams[1]
        team_dict["team_b_tricode"] = teamB_tricode.strip()

    match_notes = soup.find_all("div", {"class": "match-header-vs-note"})
    team_dict["status"] = match_notes[0].get_text().strip()
    team_dict["bo"] = match_notes[1].get_text().strip()[-1]

    # Header info
    header = soup.find("div", {"class": "match-header-super"})

    date = soup.find_all("div", class_="moment-tz-convert")[0].get("data-utc-ts")
    try:
        patch = header.find("div", style="font-style: italic;").get_text(strip=True)
    except Exception as e:
        print(f"Error in patch{e}")
        patch = "No patch"

    team_dict["date"] = date
    team_dict["patch"] = patch

    return team_dict


# Map draft: 
def get_map_draft(soup):
    try:
        pick_bans = soup.find(
            "div", {"class": "match-header-note"}
        ).get_text(strip=True).split(sep=";")

        pick_bans = [x.strip() for x in pick_bans]

    except Exception as e:
        print(f"Error en get_map_draft: {e}")
    
    return pick_bans

def pickBansExtractor(soup, basic_match_info=None):

    if basic_match_info is None:
        print("Add basic_match_info dict")
        
    picks_bans = get_map_draft(soup)

    dict_picks_bans = {
        "header": [
            "team",
            "rival",
            "team_1_select_1",
            "team_2_select_1",
            "team_1_select_2",
            "team_2_select_2",
            "team_1_select_3",
            "team_2_select_3",
            "decider",
            "order",
            "bo",
            "date",
            "event",
        ],
        "team_A": [],
        "team_B": [],
    }

    team_a = basic_match_info["team_a_tricode"]
    team_b = basic_match_info["team_b_tricode"]

    dict_picks_bans["team_A"].append(team_a)
    dict_picks_bans["team_A"].append(team_b)
    dict_picks_bans["team_B"].append(team_b)
    dict_picks_bans["team_B"].append(team_a)

    for element in picks_bans:
        list_element = element.split()
        if len(list_element) == 3:
            dict_picks_bans["team_A"].append(list_element[-1])
        if len(list_element) == 2:
            dict_picks_bans["team_A"].append(list_element[0])

    order_team_B = [1, 0, 3, 2, 5, 4, 6]  # for order swaping
    maps_teamA = dict_picks_bans["team_A"][2:]

    ordenado = [maps_teamA[i] for i in order_team_B]

    for map in ordenado:
        dict_picks_bans["team_B"].append(map)

    dict_picks_bans["team_A"].append("A")
    dict_picks_bans["team_B"].append("B")

    bo = basic_match_info["bo"]

    dict_picks_bans["team_A"].append(bo)
    dict_picks_bans["team_B"].append(bo)

    date = soup.find_all("div", class_="moment-tz-convert")[0].get("data-utc-ts")
    dict_picks_bans["team_A"].append(date)
    dict_picks_bans["team_B"].append(date)

    event = basic_match_info["event"]

    dict_picks_bans["team_A"].append(event)
    dict_picks_bans["team_B"].append(event)

    return dict_picks_bans

#Round detail

def round_detail_to_dict(round_detail):
    round_detail_for_csv = {
        "teamA": [],
        "map": [],
        "side": [],
        "teamB": [],
        "rndA": [],
        "rndB": [],
        "round": [],
        "winCon": [],
        "date": [],
        "map_order": [],
        'event': []
    }

    for count, rondaAtk in enumerate(round_detail["teamATT"]):
        round_detail_for_csv["teamA"].append(round_detail["team_a"])
        round_detail_for_csv["teamB"].append(round_detail["team_b"])
        round_detail_for_csv["side"].append("atk")
        round_detail_for_csv["rndA"].append(rondaAtk)
        round_detail_for_csv["rndB"].append(round_detail["teamBCT"][count])
        round_detail_for_csv["map"].append(round_detail["map"])
        round_detail_for_csv["round"].append(round_detail["ratk"][count])
        round_detail_for_csv["winCon"].append(round_detail["winConAtk"][count])
        round_detail_for_csv["date"].append(round_detail["date"])
        round_detail_for_csv["map_order"].append(round_detail["map_order"])
        round_detail_for_csv["event"].append(round_detail["event"])

    for count, rondaDef in enumerate(round_detail["teamACT"]):
        round_detail_for_csv["teamA"].append(round_detail["team_a"])
        round_detail_for_csv["teamB"].append(round_detail["team_b"])
        round_detail_for_csv["side"].append("def")
        round_detail_for_csv["rndA"].append(rondaDef)
        round_detail_for_csv["rndB"].append(round_detail["teamBTT"][count])
        round_detail_for_csv["map"].append(round_detail["map"])
        round_detail_for_csv["round"].append(round_detail["rdef"][count])
        round_detail_for_csv["winCon"].append(round_detail["winConDef"][count])
        round_detail_for_csv["date"].append(round_detail["date"])
        round_detail_for_csv["map_order"].append(round_detail["map_order"])
        round_detail_for_csv["event"].append(round_detail["event"])

    save_round_detail_to_csv(round_detail_for_csv)

    team_b_prespective = {
        "teamA": round_detail_for_csv["teamB"],
        "map": round_detail_for_csv["map"],
        "side": ["def" if "atk" in x else "atk" for x in round_detail_for_csv["side"]],
        "teamB": round_detail_for_csv["teamA"],
        "rndA": round_detail_for_csv["rndB"],
        "rndB": round_detail_for_csv["rndA"],
        "round": round_detail_for_csv["round"],
        "winCon": round_detail_for_csv["winCon"],
        "date": round_detail_for_csv["date"],
        "map_order": round_detail_for_csv["map_order"],
        'event': round_detail_for_csv["event"]
    }

    save_round_detail_to_csv(team_b_prespective)



def get_round_detail(soup, basic_match_info=None):

    if basic_match_info is None:
        print("basic_match_info required")

    round_info = {
        "team_a": None,
        "team_b": None,
        "map": None,
        "teamACT": [],
        "teamATT": [],
        "teamBCT": [],
        "teamBTT": [],
        "ratk": [],
        "rdef": [],
        "winConAtk": [],
        "winConDef": [],
        "date": None,
        "map_order": None,
        "event": None,
    }
    
    maps = []

    map_div = soup.find_all("div", class_="map")

    for map in map_div:
        map_name_span = map.find("span", attrs={"style": "position: relative;"})
        map_name = map_name_span.find(string=True, recursive=False).strip()
        maps.append(map_name)

    bloques = soup.find_all("div", class_="vlr-rounds-row-col")
    control_value = 0
    mapNumber = 0

    round_info["date"] = basic_match_info["date"]
    round_info["map"] = maps[mapNumber]
    round_info["map_order"] = mapNumber

    round_info["event"] = basic_match_info["event"]

    for count, ronda in enumerate(bloques):
        try:
            round_info["team_a"] = basic_match_info["team_a_tricode"]
            round_info["team_b"] = basic_match_info["team_b_tricode"]
            value = int(ronda.find_all("div", class_="rnd-num")[0].text.strip())
            imgUrl = str(ronda.find_all("img")[0])
            victory_condition = imgUrl[0:-3].split("/")[-1].rstrip(".webp")

            if value >= control_value:
                control_value = value
                round_for_eval = re.findall(r"rnd-sq(.*)", str(bloques[count]))
                if round_for_eval[0] == ' mod-win mod-ct">':
                    round_info["teamACT"].append(1)
                    round_info["teamBTT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
                elif round_for_eval[0] == ' mod-win mod-t">':
                    round_info["teamATT"].append(1)
                    round_info["teamBCT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                if round_for_eval[1] == ' mod-win mod-ct">':
                    round_info["teamBCT"].append(1)
                    round_info["teamATT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                elif round_for_eval[1] == ' mod-win mod-t">':
                    round_info["teamBTT"].append(1)
                    round_info["teamACT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
                
            else:
                mapNumber += 1
                control_value = value
                round_detail_to_dict(round_info)
                round_info = {
                    "team_a": None,
                    "team_b": None,
                    "map": None,
                    "teamACT": [],
                    "teamATT": [],
                    "teamBCT": [],
                    "teamBTT": [],
                    "ratk": [],
                    "rdef": [],
                    "winConAtk": [],
                    "winConDef": [],
                    "date": None,
                    "map_order": None,
                    "event": None,
                }

                round_info["team_a"] = basic_match_info["team_a_tricode"]
                round_info["team_b"] = basic_match_info["team_b_tricode"]
                
                round_info["map_order"] = mapNumber
                round_info["map"] = maps[mapNumber]
                round_for_eval = re.findall(r"rnd-sq(.*)", str(bloques[count]))
                imgUrl = str(ronda.find_all("img")[0])
                victory_condition = imgUrl[0:-3].split("/")[-1].rstrip(".webp")

                round_info["date"] = basic_match_info["date"]
                round_info["event"] = basic_match_info["event"]

                if round_for_eval[0] == ' mod-win mod-ct">':
                    round_info["teamACT"].append(1)
                    round_info["teamBTT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
                elif round_for_eval[0] == ' mod-win mod-t">':
                    round_info["teamATT"].append(1)
                    round_info["teamBCT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                if round_for_eval[1] == ' mod-win mod-ct">':
                    round_info["teamBCT"].append(1)
                    round_info["teamATT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                elif round_for_eval[1] == ' mod-win mod-t">':
                    round_info["teamBTT"].append(1)
                    round_info["teamACT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
             
        except:
            pass
    round_detail_to_dict(round_info)   
    return round_info

def get_player_performance(url, basic_match_info):
    performance_dict = {
    "player": [],
    'team': [],
    "2K": [],
    "3K": [],
    "4K": [],
    "5K": [],
    "1v1": [],
    "1v2": [],
    "1v3": [],
    "1v4": [],
    "1v5": [],
    "ECON": [],
    "PL": [],
    "DE": [],
    "map": [],
    "date": [],
    'event':[]
    }
    
    performance_tab = '/?game=all&tab=performance'

    url_performance = url + performance_tab

    soup_performance = soup_open(url_performance)

    bo = int(basic_match_info["bo"]) #Could be not necesary to do this check 

    status = basic_match_info["status"]

    if status == "final" and (bo == 3 or bo == 5):
        get_games_id = soup_performance.find_all("div", {"class": "vm-stats-game"})
        game_ids = [
            div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
        ]
        
        map_list = ["all"]

        maps = soup_performance.find_all(
            "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
        )
        for map in maps:
            map_list.append(map.get_text(strip=True)[1:])

        for index, id in enumerate(game_ids):
            div = soup_performance.find("div", {"class": "vm-stats-game", "data-game-id": id})
            test_div = div.find_all("tr")[1:]
            pre_process = []
            for element in test_div:
                if len(element) > 13:
                    pre_process.append(element)

            filas = pre_process[1:]

            for fila in filas:
                celdas = fila.find_all("td")

                if len(celdas) > 0:  # Check if info is valid (map is played)

                    jugador_div = celdas[0].find("div").find_all("div")[0]
                    
                    nombre_jugador = jugador_div.get_text().split() 
                    
                    def extraer_numero(text):
                        match = re.match(r"^\d+", text)
                        return int(match.group()) if match else 0

                    performance_dict["player"].append(nombre_jugador[0])
                    performance_dict["team"].append(nombre_jugador[1])
                    performance_dict["2K"].append(
                        extraer_numero(celdas[2].get_text(strip=True))
                    )
                    performance_dict["3K"].append(
                        extraer_numero(celdas[3].get_text(strip=True))
                    ),
                    performance_dict["4K"].append(
                        extraer_numero(celdas[4].get_text(strip=True))
                    ),
                    performance_dict["5K"].append(
                        extraer_numero(celdas[5].get_text(strip=True))
                    ),
                    performance_dict["1v1"].append(
                        extraer_numero(celdas[6].get_text(strip=True))
                    ),
                    performance_dict["1v2"].append(
                        extraer_numero(celdas[7].get_text(strip=True))
                    ),
                    performance_dict["1v3"].append(
                        extraer_numero(celdas[8].get_text(strip=True))
                    ),
                    performance_dict["1v4"].append(
                        extraer_numero(celdas[9].get_text(strip=True))
                    ),
                    performance_dict["1v5"].append(
                        extraer_numero(celdas[10].get_text(strip=True))
                    ),
                    performance_dict["ECON"].append(
                        extraer_numero(celdas[11].get_text(strip=True))
                    ),
                    performance_dict["PL"].append(
                        extraer_numero(celdas[12].get_text(strip=True))
                    ),
                    performance_dict["DE"].append(
                        extraer_numero(celdas[13].get_text(strip=True))
                    )
                    performance_dict["date"].append(basic_match_info["date"])
                    performance_dict["event"].append(basic_match_info["event"]) 
                    performance_dict["map"].append(map_list[index])   

    return performance_dict

def get_team_economy(url,basic_match_info):
    economy_dict = {
    "team_a": [],
    "team_b": [],
    "team_a_economy": [],
    "team_b_economy": [],
    "round": [],
    "team_a_bank":[],
    "team_b_bank":[],
    "map": [],
    "date": [],
    'event': [],
}
    economy_page = url + "/?game=all&tab=economy"

    soup_economy = soup_open(economy_page)

    get_games_id = soup_economy.find_all("div", {"class": "vm-stats-game"})
    game_ids = [
        div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
    ]

    map_list = []

    maps = soup_economy.find_all(
        "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
    )
    for map in maps:
        map_list.append(map.get_text(strip=True)[1:])

    event = basic_match_info["event"]
    date = basic_match_info["date"]



    for value, id in enumerate(game_ids[:-1]):

        div = soup_economy.find("div", {"class": "vm-stats-game", "data-game-id": id})
        test_div = div.find_all("tr")[1:]

        teams = []
        round = 0
        comments = div.find_all(string=lambda text: isinstance(text, Comment))

        both_team_economy = []
        for comment in comments:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            div = comment_soup.find('div')
            if div and div.text.strip():
                both_team_economy.append(div.text.strip())
        
        for index, element in enumerate(both_team_economy):
            if index % 2 != 0:
                economy_dict["team_a_economy"].append(element)

            else:
                economy_dict["team_b_economy"].append(element)


        for fila in test_div[1:]:
            celdas = fila.find_all("td")
            if len(teams) < 2:
                teams.append(
                    celdas[0]
                    .find_all("div", {"class": "team"})[0]
                    .get_text(strip=True)
                )
            for bank in celdas:
                team_bank = bank.find_all("div", {"class": "bank"})
                if len(team_bank) > 0:
                    round += 1
                    economy_dict["team_a"].append(teams[0])
                    economy_dict["team_b"].append(teams[1])
                    economy_dict["team_a_bank"].append(
                        team_bank[0].get_text(strip=True)
                    )
                    economy_dict["team_b_bank"].append(
                        team_bank[1].get_text(strip=True)
                    )
                    economy_dict["round"].append(round)
                    economy_dict["map"].append(map_list[value])
                    economy_dict["date"].append(date)
                    economy_dict['event'].append(event)

        team_b_economy_dict = {
    "team_a": economy_dict["team_b"],
    "team_b": economy_dict["team_a"],
    "team_a_economy": economy_dict["team_b_economy"],
    "team_b_economy": economy_dict["team_a_economy"],
    "round": economy_dict["round"],
    "team_a_bank":economy_dict["team_b_bank"],
    "team_b_bank":economy_dict["team_a_bank"],
    "map": economy_dict["map"],
    "date": economy_dict["date"],
    'event': economy_dict["event"],
}
                    
    return [economy_dict, team_b_economy_dict]

In [4]:
def check_valid_match(soup):

    event_text = soup.find("title").get_text(strip=True)
    regex = r"^([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)$"
    result = re.search(regex, event_text)

    match_notes = soup.find_all("div", {"class": "match-header-vs-note"})
    status = match_notes[0].get_text().strip()

    if result.group(3).strip() == "Showmatch" or status != "final":
        valid_match = False
    else:
        valid_match = True
    return valid_match


def linkExtractor(url):
    """With the url from matches all in vlr, create a list of links with
    all the matches.
    """
    soup = soup_open(url)

    tempLink = []
    urlLinkExtract = []
    for a in soup.find_all("a", href=True):
        tempLink.append(a["href"])
        filtered_links = [link for link in tempLink if re.match(r"^/\d+", link)]

    for cleanLink in filtered_links:
        urlLinkExtract.append("https://www.vlr.gg" + cleanLink)

    return urlLinkExtract


def get_draft_file_path(basic_match_info, folder="csv"):
    normalized_tournament = normalize_filename(basic_match_info["event"])
    folder_path = os.path.join(folder, normalized_tournament)
    os.makedirs(folder_path, exist_ok=True)
    return os.path.join(folder_path, f"draft_{normalized_tournament}.csv")


def was_url_already_processed(file_path, url):
    if not os.path.exists(file_path):
        return False
    df = pd.read_csv(file_path)
    return url in set(df.source_url)

def process_match(url):
    time.sleep(random.randint(1, 2))
    soup = soup_open(url)

    if check_valid_match(soup):
        print(f"processing: {url}")
        basic_match_info = get_basic_match_info(soup)
        path = get_draft_file_path(basic_match_info=basic_match_info)
        #Check if match is processed
        not_processed = not was_url_already_processed(file_path=path,url=url)
        if not_processed:
            #Draft
            draft = pickBansExtractor(soup=soup, basic_match_info=basic_match_info)
            save_draft_to_csv(draft,url)
            
            #Round detail
            get_round_detail(soup=soup,basic_match_info=basic_match_info)
            
            #Player performance
            performance_dict = get_player_performance(url=url,basic_match_info=basic_match_info)
            save_player_performance_to_csv(player_performance_dict=performance_dict)
            
            #Team economy
            team_economy_dict = get_team_economy(url,basic_match_info=basic_match_info)
            save_team_economy(team_economy_dict[0])
            save_team_economy(team_economy_dict[1])
        else: 
            print(f"already processed: {url}")

    else:
        print(f"Not valid match: {url}")

In [5]:
url = 'https://www.vlr.gg/event/matches/2282/valorant-masters-toronto-2025/?series_id=all'
lista = linkExtractor(url)
lista_recortada = lista[-4:]


In [178]:
lista_recortada = ["https://www.vlr.gg/490314/paper-rex-vs-team-liquid-valorant-masters-toronto-2025-r3-1-1",
                   "https://www.vlr.gg/498628/paper-rex-vs-fnatic-valorant-masters-toronto-2025-gf"]

In [10]:
for url in lista_recortada:
    process_match(url)

processing: https://www.vlr.gg/498633/g2-esports-vs-fnatic-valorant-masters-toronto-2025-lr3
processing: https://www.vlr.gg/498634/wolves-esports-vs-fnatic-valorant-masters-toronto-2025-lbf
Not valid match: https://www.vlr.gg/507067/team-tarik-vs-team-toast-valorant-masters-toronto-2025-showmatch
processing: https://www.vlr.gg/498628/paper-rex-vs-fnatic-valorant-masters-toronto-2025-gf


In [275]:
def get_team_economy(url,basic_match_info):
    economy_dict = {
    "team_a": [],
    "team_b": [],
    "team_a_economy": [],
    "team_b_economy": [],
    "round": [],
    "team_a_bank":[],
    "team_b_bank":[],
    "map": [],
    "date": [],
    'event': [],
}
    economy_page = url + "/?game=all&tab=economy"

    soup_economy = soup_open(economy_page)

    get_games_id = soup_economy.find_all("div", {"class": "vm-stats-game"})
    game_ids = [
        div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
    ]

    map_list = ["all"]

    maps = soup_economy.find_all(
        "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
    )
    for map in maps:
        map_list.append(map.get_text(strip=True)[1:])

    event = basic_match_info["event"]
    date = basic_match_info["date"]



    for value, id in enumerate(game_ids[:-1]):

        div = soup_economy.find("div", {"class": "vm-stats-game", "data-game-id": id})
        test_div = div.find_all("tr")[1:]

        teams = []
        round = 0
        comments = div.find_all(string=lambda text: isinstance(text, Comment))

        both_team_economy = []
        for comment in comments:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            div = comment_soup.find('div')
            if div and div.text.strip():
                both_team_economy.append(div.text.strip())
        
        for index, element in enumerate(both_team_economy):
            if index % 2 == 0:
                economy_dict["team_a_economy"].append(element)

            else:
                economy_dict["team_b_economy"].append(element)


        for fila in test_div[1:]:
            celdas = fila.find_all("td")
            if len(teams) < 2:
                teams.append(
                    celdas[0]
                    .find_all("div", {"class": "team"})[0]
                    .get_text(strip=True)
                )
            for bank in celdas:
                team_bank = bank.find_all("div", {"class": "bank"})
                if len(team_bank) > 0:
                    round += 1
                    economy_dict["team_a"].append(teams[0])
                    economy_dict["team_b"].append(teams[1])
                    economy_dict["team_a_bank"].append(
                        team_bank[0].get_text(strip=True)
                    )
                    economy_dict["team_b_bank"].append(
                        team_bank[1].get_text(strip=True)
                    )
                    economy_dict["round"].append(round)
                    economy_dict["map"].append(map_list[value])
                    economy_dict["date"].append(date)
                    economy_dict['event'].append(event)
                    
    return economy_dict

In [171]:
url

'https://www.vlr.gg/490314/paper-rex-vs-team-liquid-valorant-masters-toronto-2025-r3-1-1'

In [172]:
performance_dict = get_player_performance(url, basic_match_info)

In [276]:
team_economy_dict = get_team_economy(url,basic_match_info=basic_match_info)
save_team_economy(team_economy_dict)

In [253]:
#Fix sopa / soup como nomenclatura
dict_economia = {
    "team_a": [],
    "team_b": [],
    "team_a_economy": [],
    "team_b_economy": [],
    "round": [],
    "team_a_bank":[],
    "team_b_bank":[],
    "map": [],
    "date": [],
    'event': [],
}

economy_page = url + "/?game=all&tab=economy"

soup_economy = soup_open(economy_page)

In [None]:
get_games_id = soup_economy.find_all("div", {"class": "vm-stats-game"})
game_ids = [
        div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
    ]

map_list = ["all"]

maps = soup_economy.find_all(
    "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
)
for map in maps:
    map_list.append(map.get_text(strip=True)[1:])

event = basic_match_info["event"]
date = basic_match_info["date"]



for value, id in enumerate(game_ids[:-1]):

    div = soup_economy.find("div", {"class": "vm-stats-game", "data-game-id": id})
    test_div = div.find_all("tr")[1:]

    teams = []
    round = 0
    comments = div.find_all(string=lambda text: isinstance(text, Comment))

    both_team_economy = []
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')
        div = comment_soup.find('div')
        if div and div.text.strip():
            both_team_economy.append(div.text.strip())
    
    for index, element in enumerate(both_team_economy):
        if index % 2 == 0:
            dict_economia["team_a_economy"].append(element)

        else:
            dict_economia["team_b_economy"].append(element)


    for fila in test_div[1:]:
        celdas = fila.find_all("td")
        if len(teams) < 2:
            teams.append(
                celdas[0]
                .find_all("div", {"class": "team"})[0]
                .get_text(strip=True)
            )
        for bank in celdas:
            team_bank = bank.find_all("div", {"class": "bank"})
            if len(team_bank) > 0:
                round += 1
                dict_economia["team_a"].append(teams[0])
                dict_economia["team_b"].append(teams[1])
                dict_economia["team_a_bank"].append(
                    team_bank[0].get_text(strip=True)
                )
                dict_economia["team_b_bank"].append(
                    team_bank[1].get_text(strip=True)
                )
                dict_economia["round"].append(round)
                dict_economia["map"].append(map_list[value])
                dict_economia["date"].append(date)
                dict_economia['event'].append(event)
        

csv_file = "economy_test.csv"

with open(csv_file, mode="w", newline="",encoding="iso-8859-1") as file:
    writer = csv.writer(file)
    headers = dict_economia.keys()
    writer.writerow(headers)
    rows = zip(*dict_economia.values())
    for row in rows:
        writer.writerow(row)

In [258]:
values = ['4.0k', '3.6k', '2.7k', '14.4k', '21.8k', '19.5k', '10.7k', '23.0k', '22.8k', '24.3k', '22.7k', '17.5k', '23.6k', '17.0k', '23.9k', '13.9k', '24.5k', '21.1k', '24.5k', '13.4k', '24.2k', '20.9k', '26.6k', '17.0k', '3.8k', '3.8k', '2.3k', '16.0k', '21.6k', '21.4k', '22.4k', '20.2k', '11.0k', '18.2k', '21.5k', '22.8k', '10.4k', '22.8k', '21.3k', '23.8k', '23.1k', '23.2k', '23.4k', '22.3k', '24.3k', '23.5k', '23.6k', '22.9k']

In [262]:
team_a_economy = []
team_b_economy = []
for index, element in enumerate(values):
    if index % 2 == 0:
        team_a_economy.append(element)
    else: 
        team_b_economy.append(element)

In [264]:
team_b_economy

['3.6k',
 '14.4k',
 '19.5k',
 '23.0k',
 '24.3k',
 '17.5k',
 '17.0k',
 '13.9k',
 '21.1k',
 '13.4k',
 '20.9k',
 '17.0k',
 '3.8k',
 '16.0k',
 '21.4k',
 '20.2k',
 '18.2k',
 '22.8k',
 '22.8k',
 '23.8k',
 '23.2k',
 '22.3k',
 '23.5k',
 '22.9k']

In [235]:
id = game_ids[1]

div = soup_economy.find("div", {"class": "vm-stats-game", "data-game-id": id})
test_div = div.find_all("tr")[1:]

In [246]:
test_div[2]

<tr>
<td>
<div class="ge-text-light label" style="padding-bottom: 10px;">	
									(BANK)
								</div>
<div class="team" style="height: 28px;">
<img src="//owcdn.net/img/62bbeba74d5cb.png"/>
										
																		PRX								</div>
<div class="team" style="height: 28px; margin-top: 3px;">
<img src="//owcdn.net/img/62a40cc2b5e29.png"/>
										
																		FNC								</div>
<div class="ge-text-light label" style="padding-top: 10px;">	
									(BANK)
								</div>
</td>
<td>
<div class="ge-text-light round-num">
											1										</div>
<div class="bank">0.2k</div>
<!--
										<div>
											3.8k										</div>
										-->
<div class="rnd-sq mod-win mod-ct">
</div>
<div class="rnd-sq">
</div>
<!--
										<div>
											3.9k										</div>
										-->
<div class="bank">
											0.1k										</div>
</td>
<td>
<div class="ge-text-light round-num">
											2										</div>
<div class="bank">4.3k</div>
<!--
										<div>
											15.0k							

In [247]:
comments = test_div[2].find_all(string=lambda text: isinstance(text, Comment))

values = []
for comment in comments:
    comment_soup = BeautifulSoup(comment, 'html.parser')
    div = comment_soup.find('div')
    if div and div.text.strip():
        values.append(div.text.strip())


In [257]:
values

[]

In [239]:
comments = soup_economy.find_all(string=lambda text: isinstance(text, Comment))


comments

['\n\t\t<a href="/search" class="header-nav-item mod-search mod-solo">\n\t\t\t<i class="fa fa-search"></i>\n\t\t</a>\n\t\t<div class="header-div mod-search">\n\t\t</div>\n\t\t',
 '\n\t\t<div style="display: flex; padding: 0 20px; padding-bottom: 12px; ">\n\n\t\t\t<div class="wf-label" style="padding: 0; padding-bottom: 5px;  border-bottom: 1px dotted #aaa;">\n\t\t\t\tOfficial Streams\n\t\t\t</div>\n\t\t\t<div>\n\t\t\t\t/\n\t\t\t</div>\n\t\t\t<div class="wf-label" style="padding-bottom: 2px;">\n\t\t\t\tCo-streams\n\t\t\t</div>\n\t\t</div>\n\t\t',
 '\n\t\t\t\t\t\t<div>\n\t\t\t\t\t\t\t<i class="fa fa-compress"></i> Collapse\n\t\t\t\t\t\t</div>\n\t\t\t\t\t\t',
 '\n\t\t\t\t\t\t\t\t\t\t<div class="pick ge-text-light">\n\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t</div>\n\t\t\t\t\t\t\t\t\t\t',
 '\n\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t<div class="team">\n\t\t\t\t\t\t\t\t\t\t\t-\n\t\t\t\t\t\t\t\t\t\t</div>\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t'

In [234]:
div_tr[0].find_all("td")

[<td>
 <div class="team">
 <img src="//owcdn.net/img/62a40cc2b5e29.png"/>
 										
 									
 																		FNC								</div>
 </td>,
 <td>
 <div class="stats-sq">
 																					1																			</div>
 </td>,
 <td>
 <div class="stats-sq">
 																					3											(1)
 																			</div>
 </td>,
 <td>
 <div class="stats-sq">
 																					1											(0)
 																			</div>
 </td>,
 <td>
 <div class="stats-sq">
 																					3											(1)
 																			</div>
 </td>,
 <td>
 <div class="stats-sq">
 																					25											(15)
 																			</div>
 </td>]

In [None]:
bo = int(basic_match_info["bo"]) #Could be not necesary to do this check 

status = basic_match_info["status"]

if status == "final" and (bo == 3 or bo == 5):

    get_games_id = soup_economy.find_all("div", {"class": "vm-stats-game"})
    game_ids = [
        div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
    ]
    
    map_list = ["all"]

    maps = soup_economy.find_all(
        "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
    )
    for i in maps:
        map_list.append(i.get_text(strip=True)[1:])

    for value, id in enumerate(game_ids[:-1]):

        div = soup_economy.find("div", {"class": "vm-stats-game", "data-game-id": id})
        test_div = div.find_all("tr")[1:]

        teams = []
        round = 0
        fecha = soup_economy.find_all("div", class_="moment-tz-convert")[0].get(
            "data-utc-ts"
        )
        event = event_name(soup_economy)

        for fila in test_div:
            celdas = fila.find_all("td")
            if len(teams) < 2:
                teams.append(
                    celdas[0]
                    .find_all("div", {"class": "team"})[0]
                    .get_text(strip=True)
                )
            for bank in celdas:
                team_bank = bank.find_all("div", {"class": "bank"})
                if len(team_bank) > 0:
                    round += 1
                    dict_economia["team_a"].append(teams[0])
                    dict_economia["team_b"].append(teams[1])
                    dict_economia["team_a_economy"].append(
                        team_bank[0].get_text(strip=True)
                    )
                    dict_economia["team_b_economy"].append(
                        team_bank[1].get_text(strip=True)
                    )
                    dict_economia["round"].append(round)
                    dict_economia["map"].append(map_list[value])
                    dict_economia["date"].append(fecha)
                    dict_economia['event'].append(event)

csv_file = "economy.csv"

with open(csv_file, mode="w", newline="",encoding="iso-8859-1") as file:
    writer = csv.writer(file)
    headers = dict_economia.keys()
    writer.writerow(headers)
    rows = zip(*dict_economia.values())
    for row in rows:
        writer.writerow(row)

In [159]:
maps = soup_performance.find_all(
        "div", {"class": "vm-stats-gamesnav-item js-map-switch"}
    )

In [160]:
map_list = ["all"]

for i in maps:
    map_list.append(i.get_text(strip=True)[1:])


In [162]:
game_ids

['all', '221168', '221169', '221170', '221171', '221172']

In [168]:
for i in performance_dict.keys():
    print(([i]) , len(performance_dict[i]))

['player'] 41
['team'] 41
['2K'] 41
['3K'] 41
['4K'] 41
['5K'] 41
['1v1'] 41
['1v2'] 41
['1v3'] 41
['1v4'] 41
['1v5'] 41
['ECON'] 41
['PL'] 41
['DE'] 41
['map'] 41
['date'] 41
['event'] 41


In [169]:
csv_file = 'performance_refactor.csv'

with open(csv_file, mode='w', newline='',encoding="iso-8859-1") as file:
    writer = csv.writer(file)

    headers = performance_dict.keys()
    writer.writerow(headers)
    
    rows = zip(*performance_dict.values())
    for row in rows:
        writer.writerow(row)

In [None]:
#Ordenar un poco mejor el codigo 
performance_dict = {
    "player": [],
    'team': [],
    "2K": [],
    "3K": [],
    "4K": [],
    "5K": [],
    "1v1": [],
    "1v2": [],
    "1v3": [],
    "1v4": [],
    "1v5": [],
    "ECON": [],
    "PL": [],
    "DE": [],
    "map": [],
    "date": [],
    'event':[]
}

for dire in toExtract:
    dire_perfo = dire+'/?game=all&tab=performance'
    sopa = urlStats(dire_perfo)
    soup_pickedMap = urlStats(dire)
    mejorDe, estado = verificador(dire)
    if estado == "final" and (mejorDe == 3 or mejorDe == 5):
        # pickedMap = add_map_performance_page(soup_pickedMap)
        
        get_games_id = sopa.find_all("div", {"class": "vm-stats-game"})
        game_ids = [
            div.get("data-game-id") for div in get_games_id if div.has_attr("data-game-id")
        ]


        for id in game_ids:
            div = sopa.find("div", {"class": "vm-stats-game", "data-game-id": id})
            test_div = div.find_all("tr")[1:]
            pre_process = []
            for element in test_div:
                if len(element) > 13:
                    pre_process.append(element)

            filas = pre_process[1:]
            # Lista para almacenar los datos
            jugadores = []

            for fila in filas:
                celdas = fila.find_all("td")

                if len(celdas) > 0:  # Para asegurarnos de que hay datos válidos en la fila
                    # Extraer nombre del jugador
                    jugador_div = celdas[0].find("div").find_all("div")[0]
                    
                    nombre_jugador = jugador_div.get_text().split() # Primera línea es el nombre
                    
                    def extraer_numero(texto):
                        match = re.match(r"^\d+", texto)
                        return int(match.group()) if match else 0

                    performance_dict["player"].append(nombre_jugador[0])
                    performance_dict["team"].append(nombre_jugador[1])
                    performance_dict["2K"].append(
                        extraer_numero(celdas[2].get_text(strip=True))
                    )
                    performance_dict["3K"].append(
                        extraer_numero(celdas[3].get_text(strip=True))
                    ),
                    performance_dict["4K"].append(
                        extraer_numero(celdas[4].get_text(strip=True))
                    ),
                    performance_dict["5K"].append(
                        extraer_numero(celdas[5].get_text(strip=True))
                    ),
                    performance_dict["1v1"].append(
                        extraer_numero(celdas[6].get_text(strip=True))
                    ),
                    performance_dict["1v2"].append(
                        extraer_numero(celdas[7].get_text(strip=True))
                    ),
                    performance_dict["1v3"].append(
                        extraer_numero(celdas[8].get_text(strip=True))
                    ),
                    performance_dict["1v4"].append(
                        extraer_numero(celdas[9].get_text(strip=True))
                    ),
                    performance_dict["1v5"].append(
                        extraer_numero(celdas[10].get_text(strip=True))
                    ),
                    performance_dict["ECON"].append(
                        extraer_numero(celdas[11].get_text(strip=True))
                    ),
                    performance_dict["PL"].append(
                        extraer_numero(celdas[12].get_text(strip=True))
                    ),
                    performance_dict["DE"].append(
                        extraer_numero(celdas[13].get_text(strip=True))
                    )
        final_map, final_date,final_event = add_map_performance_page(soup_pickedMap)
        performance_dict["map"].extend(final_map)
        performance_dict["date"].extend(final_date)
        performance_dict["event"].extend(final_event) 

    elif estado != "final":
        print("Match no finalizado: " + dire)

csv_file = 'performance.csv'

with open(csv_file, mode='w', newline='',encoding="iso-8859-1") as file:
    writer = csv.writer(file)

    headers = performance_dict.keys()
    writer.writerow(headers)
    
    rows = zip(*performance_dict.values())
    for row in rows:
        writer.writerow(row)


In [13]:
#Test functions

#url = "https://www.vlr.gg/498633/g2-esports-vs-fnatic-valorant-masters-toronto-2025-lr3"
url ="https://www.vlr.gg/498628/paper-rex-vs-fnatic-valorant-masters-toronto-2025-gf"
#url = "https://www.vlr.gg/507067/team-tarik-vs-team-toast-valorant-masters-toronto-2025-showmatch"
soup = soup_open(url)

basic_match_info = get_basic_match_info(soup)
draft = pickBansExtractor(soup=soup, basic_match_info=basic_match_info)

In [27]:
bloques = soup.find_all("div", class_="vlr-rounds-row-col")

In [None]:
maps_played = []

map_div =soup.find_all("div", class_="map") 

for map in map_div:
    map_name_span = map.find("span", attrs={"style": "position: relative;"})
    map_name = map_name_span.find(string=True, recursive=False).strip()
    maps_played.append(map_name)

Sunset
Icebox
Pearl
Lotus


In [None]:
def round_detail_to_dict(round_detail):
    round_detail_for_csv = {
        "teamA": [],
        "map": [],
        "side": [],
        "teamB": [],
        "rndA": [],
        "rndB": [],
        "round": [],
        "winCon": [],
        "date": [],
        "map_order": [],
        'event': []
    }

    for count, rondaAtk in enumerate(round_detail["teamATT"]):
        round_detail_for_csv["teamA"].append(round_detail["team_a"])
        round_detail_for_csv["teamB"].append(round_detail["team_b"])
        round_detail_for_csv["side"].append("atk")
        round_detail_for_csv["rndA"].append(rondaAtk)
        round_detail_for_csv["rndB"].append(round_detail["teamBCT"][count])
        round_detail_for_csv["map"].append(round_detail["map"])
        round_detail_for_csv["round"].append(round_detail["ratk"][count])
        round_detail_for_csv["winCon"].append(round_detail["winConAtk"][count])
        round_detail_for_csv["date"].append(round_detail["date"])
        round_detail_for_csv["map_order"].append(round_detail["map_order"])
        round_detail_for_csv["event"].append(round_detail["event"])

    for count, rondaDef in enumerate(round_detail["teamACT"]):
        round_detail_for_csv["teamA"].append(round_detail["team_a"])
        round_detail_for_csv["teamB"].append(round_detail["team_b"])
        round_detail_for_csv["side"].append("def")
        round_detail_for_csv["rndA"].append(rondaDef)
        round_detail_for_csv["rndB"].append(round_detail["teamBTT"][count])
        round_detail_for_csv["map"].append(round_detail["map"])
        round_detail_for_csv["round"].append(round_detail["rdef"][count])
        round_detail_for_csv["winCon"].append(round_detail["winConDef"][count])
        round_detail_for_csv["date"].append(round_detail["date"])
        round_detail_for_csv["map_order"].append(round_detail["map_order"])
        round_detail_for_csv["event"].append(round_detail["event"])

    save_round_detail_to_csv(round_detail_for_csv)

    team_b_prespective = {
        "teamA": round_detail_for_csv["teamB"],
        "map": round_detail_for_csv["map"],
        "side": ["def" if "atk" in x else "atk" for x in round_detail_for_csv["side"]],
        "teamB": round_detail_for_csv["teamA"],
        "rndA": round_detail_for_csv["rndB"],
        "rndB": round_detail_for_csv["rndA"],
        "round": round_detail_for_csv["round"],
        "winCon": round_detail_for_csv["winCon"],
        "date": round_detail_for_csv["date"],
        "map_order": round_detail_for_csv["map_order"],
        'event': round_detail_for_csv["event"]
    }

    save_round_detail_to_csv(team_b_prespective)


In [None]:

def get_round_detail(soup, basic_match_info=None):

    if basic_match_info is None:
        print("basic_match_info required")

    round_info = {
        "team_a": None,
        "team_b": None,
        "map": None,
        "teamACT": [],
        "teamATT": [],
        "teamBCT": [],
        "teamBTT": [],
        "ratk": [],
        "rdef": [],
        "winConAtk": [],
        "winConDef": [],
        "date": None,
        "map_order": None,
        "event": None,
    }
    
    maps = []

    map_div = soup.find_all("div", class_="map")

    for map in map_div:
        map_name_span = map.find("span", attrs={"style": "position: relative;"})
        map_name = map_name_span.find(string=True, recursive=False).strip()
        maps.append(map_name)

    bloques = soup.find_all("div", class_="vlr-rounds-row-col")
    control_value = 0
    mapNumber = 0

    round_info["date"] = basic_match_info["date"]
    round_info["map"] = maps[mapNumber]
    round_info["map_order"] = mapNumber

    round_info["event"] = basic_match_info["event"]

    for count, ronda in enumerate(bloques):
        try:
            round_info["team_a"] = basic_match_info["team_a_tricode"]
            round_info["team_b"] = basic_match_info["team_b_tricode"]
            value = int(ronda.find_all("div", class_="rnd-num")[0].text.strip())
            imgUrl = str(ronda.find_all("img")[0])
            victory_condition = imgUrl[0:-3].split("/")[-1].rstrip(".webp")

            if value >= control_value:
                control_value = value
                round_for_eval = re.findall(r"rnd-sq(.*)", str(bloques[count]))
                if round_for_eval[0] == ' mod-win mod-ct">':
                    round_info["teamACT"].append(1)
                    round_info["teamBTT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
                elif round_for_eval[0] == ' mod-win mod-t">':
                    round_info["teamATT"].append(1)
                    round_info["teamBCT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                if round_for_eval[1] == ' mod-win mod-ct">':
                    round_info["teamBCT"].append(1)
                    round_info["teamATT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                elif round_for_eval[1] == ' mod-win mod-t">':
                    round_info["teamBTT"].append(1)
                    round_info["teamACT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)

            else:
                mapNumber += 1
                control_value = value
                round_detail_to_dict(round_info)
                round_info = {
                    "team_a": None,
                    "team_b": None,
                    "map": None,
                    "teamACT": [],
                    "teamATT": [],
                    "teamBCT": [],
                    "teamBTT": [],
                    "ratk": [],
                    "rdef": [],
                    "winConAtk": [],
                    "winConDef": [],
                    "date": None,
                    "map_order": None,
                    "event": None,
                }

                round_info["team_a"] = basic_match_info["team_a_tricode"]
                round_info["team_b"] = basic_match_info["team_b_tricode"]
                
                round_info["map_order"] = mapNumber
                round_info["map"] = maps[mapNumber]
                round_for_eval = re.findall(r"rnd-sq(.*)", str(bloques[count]))
                imgUrl = str(ronda.find_all("img")[0])
                victory_condition = imgUrl[0:-3].split("/")[-1].rstrip(".webp")
                fecha = basic_match_info["date"]
                round_info["date"] = basic_match_info["date"]
                round_info["event"] = basic_match_info["event"]

                if round_for_eval[0] == ' mod-win mod-ct">':
                    round_info["teamACT"].append(1)
                    round_info["teamBTT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
                elif round_for_eval[0] == ' mod-win mod-t">':
                    round_info["teamATT"].append(1)
                    round_info["teamBCT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                if round_for_eval[1] == ' mod-win mod-ct">':
                    round_info["teamBCT"].append(1)
                    round_info["teamATT"].append(0)
                    round_info["ratk"].append(value)
                    round_info["winConAtk"].append(victory_condition)
                elif round_for_eval[1] == ' mod-win mod-t">':
                    round_info["teamBTT"].append(1)
                    round_info["teamACT"].append(0)
                    round_info["rdef"].append(value)
                    round_info["winConDef"].append(victory_condition)
        except:
            pass

    return round_info

In [None]:
round_check = get_round_detail(soup,basic_match_info)

In [19]:
round_detail_to_dict(round_check)

In [75]:
for key in ronund.keys():
    print(key,  len(ronund[key]))

team_a 3
team_b 3
map 1
teamACT 13
teamATT 13
teamBCT 13
teamBTT 13
ratk 13
rdef 13
winConAtk 13
winConDef 13
date 1
map_order 1
event 1


In [32]:
basic_match_info = get_basic_match_info(soup)
#picks_bans = pickBansExtractor(soup, basic_match_info)

In [33]:
basic_match_info

{'team_a': 'Paper Rex',
 'team_b': 'FNATIC',
 'team_a_tricode': 'PRX',
 'team_b_tricode': 'FNC',
 'event': 'Valorant Masters Toronto 2025',
 'status': 'final',
 'bo': '5',
 'date': '2025-06-22 13:15:00',
 'patch': 'Patch 10.10',
 'tournament_instance': 'Playoffs',
 'type': 'Valorant match'}

In [None]:
test = soup.find("div",{"class":"match-header-event-series"}).get_text(strip=True).split()

if "Showmatch" in test:
    print(True)

{'header': ['team',
  'rival',
  'team_1_select_1',
  'team_2_select_1',
  'team_1_select_2',
  'team_2_select_2',
  'team_1_select_3',
  'team_2_select_3',
  'decider',
  'order',
  'bo',
  'date',
  'event'],
 'team_A': ['G2',
  'FNC',
  'Ascent',
  'Pearl',
  'Lotus',
  'Split',
  'Icebox',
  'Sunset',
  'Haven',
  'A',
  '3',
  '2025-06-20 14:40:00',
  'Valorant Masters Toronto 2025'],
 'team_B': ['FNC',
  'G2',
  'Pearl',
  'Ascent',
  'Split',
  'Lotus',
  'Sunset',
  'Icebox',
  'Haven',
  'B',
  '3',
  '2025-06-20 14:40:00',
  'Valorant Masters Toronto 2025']}

In [5]:
dict_info = get_basic_match_info(soup)

In [6]:
dict_info

{'teams': 'G2 Esports vs. FNATIC ',
 'event': ' Valorant Masters Toronto 2025 ',
 'tournament_instance': ' Playoffs ',
 'type': ' Valorant match '}

<div class="match-header-super">
<div>
<a class="match-header-event" href="/event/2282/valorant-masters-toronto-2025/playoffs">
<img src="//owcdn.net/img/603bfd7bf3f54.png" style="height: 32px; width: 32px; margin-right: 6px;"/>
<div>
<div style="font-weight: 700;">
						Valorant Masters Toronto 2025					</div>
<div class="match-header-event-series">
						Playoffs: 
						Lower Round 3					</div>
</div>
</a>
</div>
<div style="text-align: right;">
<div class="match-header-date">
<div class="moment-tz-convert" data-moment-format="dddd, MMMM Do" data-utc-ts="2025-06-20 14:40:00">
					Friday, June 20th				</div>
<div class="moment-tz-convert" data-moment-format="h:mm A z" data-utc-ts="2025-06-20 14:40:00" style="font-size: 12px;">

							
						3:40 PM -03					</div>
<div style="margin-top: 4px;">
<div style="font-style: italic;">
								Patch 10.10							</div>
</div>
</div>
</div>
</div>

In [42]:
header = soup.find("div", {"class": "match-header-super"})

# Extraer texto limpio de los elementos deseados
event_name = header.find("div", style="font-weight: 700;").get_text(strip=True)
event_stage = header.find("div", class_="match-header-event-series").get_text(strip=True)

date = header.find("div", {"data-moment-format": "dddd, MMMM Do"}).get_text(strip=True)
time = header.find("div", {"data-moment-format": "h:mm A z"}).get_text(strip=True)

patch = header.find("div", style="font-style: italic;").get_text(strip=True)

In [None]:
date+" "+time

'3:40 PM -03'