In [295]:
import pandas as pd
import requests
import sqlite3
import re
from bs4 import BeautifulSoup
from pathlib import Path

In [281]:
pt_london_2025_invited = "https://fabtcg.com/en/coverage/pro-tour-london/pairings/1/"
pt_london_2025_decklists = "https://fabtcg.com/en/coverage/pro-tour-london/decklist/{}/"
pt_london_2024_results = "https://fabtcg.com/en/coverage/pro-tour-london/results/{}/"

In [291]:
def get_player_list(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "html")

    pairing = soup.find_all("div", {"class":"tournament-coverage__player-hero-and-deck"})
    decklist_urls = [re.search(r"\/decklist\/(\d+)", url) for url in (str(url) for url in (player.find("a") for player in pairing))]

    output = [url.group(1) for url in decklist_urls if url is not None]

    return output


def get_decklist(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "html")

    decklist = [data.text.strip() for data in soup.find_all("td")]

    return decklist

def decklist_to_df(decklists):
    decklists = pd.DataFrame.from_records(decklists)
    decklists.index = [re.search(r"\((\d+)", name).group(1) for name in decklists[0]]

    participants = decklists.iloc[:,0:5].copy()
    participants = participants.rename(columns={0:"Name", 1:"Event Date", 2:"Event Name", 3:"Format", 4:"Hero"})

    decklists = decklists.drop([0,1,2,3,4], axis=1)

    decklists = pd.melt(decklists, ignore_index=False, value_name="import name")["import name"].dropna().to_frame()

    decklists[["Copies","Card"]] = decklists["import name"].str.split(" x ", expand=True)
    decklists = decklists.drop("import name", axis=1)

    return participants, decklists

def get_wins(url):

    win_tracker = {}

    def get_round_results(url):
        output = []

        page = requests.get(url)
        soup = BeautifulSoup(page.text, "html")

        for i in range(2):
            player = soup.find_all("div", {"class":f"tournament-coverage__row tournament-coverage__row--results tournament-coverage__row--winner-{i+1}"})

            winner = [re.search(r"(\d+)", str(y.find("a"))).group(1) for y in (x.find_all("div",{"class":"tournament-coverage__player-hero-and-deck"})[i] for x in player)]

            output.extend(winner) 


        return output

    for i in range(18):
        if not 5 <= i <= 11:
            ids = get_round_results(url.format(i+1))

            for player in ids:
                if player not in win_tracker:
                    win_tracker[player] = 1
                else:
                    win_tracker[player] += 1

    return pd.DataFrame.from_dict(win_tracker,orient="index", columns=["wins"])

In [None]:
player_list = get_player_list(pt_london_2025_invited)
tournament_lists = [get_decklist(pt_london_2025_decklists.format(player_id)) for player_id in player_list]

In [299]:
tournament_df = decklist_to_df(tournament_lists)

participants = tournament_df[0]
decklists = tournament_df[1]
wins = get_wins(pt_london_2024_results)

In [None]:
>>> df2 = df1.copy()
>>> with pd.ExcelWriter('output.xlsx') as writer:  
...     df1.to_excel(writer, sheet_name='Sheet_name_1')
...     df2.to_excel(writer, sheet_name='Sheet_name_2')

In [301]:
excel_path = Path("PT London.xlsx")

with pd.ExcelWriter(excel_path) as writer:
    participants.to_excel(writer, sheet_name="participants")
    decklists.to_excel(writer, sheet_name="decklists")
    wins.to_excel(writer, sheet_name="wins")