In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pyperclip
import requests
import re
from threading import Thread

In [None]:
# Define the base URL to scrape
URL = 'https://limitlesstcg.com'


# Create a Chrome driver with the options
driver = webdriver.Chrome()

In [None]:
# Create the file to store all tournaments
with open('data/tournaments.csv', 'w') as f:
    f.write('id_card,name_card,amount_card,price_card,energy_type_card,type_card,combo_type_id,combo_type_name,id_player,name_player,country_player,all_time_score,ranking_player_tournament,id_tournament,category_tournament,name_tournament,country_tournament,year_tournament,month_tournament,day_tournament,valid_rotation_at_tournament\n')

In [None]:
def get_all_tournaments(driver):

    # Initialize list of tournament page links
    links = []

    # Get all tournament links
    elements = driver.find_elements(By.XPATH, '/html/body/main/div/table/tbody/tr/td[3]/a')

    # Append each tournament page link to list
    for e in elements:
        links.append(e.get_attribute('href'))

    return links

In [None]:
filters = ['regional', 'national', 'international', 'worlds', 'special', 'cl', 'rl', 'online', 'players_cup', 'invitational']
tournament_links_by_type = {'regional': [], 'national': [], 'international': [], 'worlds': [], 'others': []}

for filter in filters:
    # Go to URL for all tournaments between 2011 and 2023
    driver.get(URL+f'/tournaments?time=all&type={filter}&format=all&region=all')

    # Get all the links to the tournaments
    tls = get_all_tournaments(driver)
    if filter in ['regional', 'national', 'international', 'worlds']:
        tournament_links_by_type[filter] = tls
    else:
        tournament_links_by_type['others'] += tls

print(f"Regionals: {len(tournament_links_by_type['regional'])}")
print(f"Nationals: {len(tournament_links_by_type['national'])}")
print(f"Internationals: {len(tournament_links_by_type['international'])}")
print(f"Worlds: {len(tournament_links_by_type['worlds'])}")
print(f"Others: {len(tournament_links_by_type['others'])}")

# Close the session
driver.close()

In [None]:
with open('data/tournaments.csv', 'w') as f:
        f.write('id_card,name_card,amount_card,price_card,energy_type_card,type_card,combo_type_id,combo_type_name,id_player,name_player,country_player,all_time_score,ranking_player_tournament,id_tournament,category_tournament,name_tournament,country_tournament,year_tournament,month_tournament,day_tournament,valid_rotation_at_tournament\n')

In [None]:
def get_tournament_data(t_category, t_link):
    
    table = []

    driver = webdriver.Chrome()
    driver.get(t_link)

    tournament = {}

    # Create the id for the tournament
    tournament['category_tournament'] = t_category
    tournament['id_tournament'] = int(t_link.split('/')[-1])
    tournament['name_tournament'] = driver.find_element(By.CLASS_NAME, 'infobox-heading').text.strip().replace(',','')
    try:
        tournament['country_tournament'] = driver.find_element(By.CLASS_NAME, 'infobox-heading').find_element(By.XPATH, './img').get_attribute('data-tooltip')
    except:
        tournament['country_tournament'] = 'None'
    tournament['valid_rotation_at_tournament'] = driver.find_element(By.CLASS_NAME, 'infobox-line').find_element(By.XPATH, './a[1]').get_attribute('href').split('=')[-1]
    t_date = driver.find_element(By.CLASS_NAME, 'infobox-line').text.split('•')[0].strip()
    t_day, t_month, t_year = t_date.split(' ')
    t_day = re.findall('\d+', t_day)[0]
    t_day = t_day if len(t_day) == 2 else '0' + t_day
    t_month = str(time.strptime(t_month[:3],'%b').tm_mon)
    t_month = t_month if len(t_month) == 2 else '0' + t_month
    tournament['year_tournament'] = t_year
    tournament['month_tournament'] = t_month
    tournament['day_tournament'] = t_day

    # Skip first row (header)
    e_decklist_row = driver.find_elements(By.XPATH, '/html/body/main/div/table/tbody/tr')[1:]
    players = []

    for e in e_decklist_row:
        player = {}
        player['ranking_player_tournament'] = e.find_element(By.XPATH, './td[1]').text
        player['country_player'] = e.find_element(By.XPATH, './td[3]/img').get_attribute('data-tooltip')
        try:
            e_player = e.find_element(By.XPATH, './td[2]/a[1]')
        except:
            e_player = e.find_element(By.XPATH, './td[2]/div/a[1]')
        player['id_player'] = e_player.get_attribute('href').split('/')[-1]
        player['name_player'] = e_player.text

        player['link_player_page'] = e_player.get_attribute('href')

        player['combo_type_id'] = e.find_element(By.XPATH, './td[4]/a').get_attribute('href').split('/')[-1].split('?')[0]
        player['combo_type_name'] = e.find_element(By.XPATH, './td[4]/a/span').get_attribute('data-tooltip')
        
        player ['link_decklist'] = e.find_element(By.XPATH, './td[5]/a').get_attribute('href')

        players.append(player)
    
    for player in players:
        driver.get(player['link_player_page'])
        player['all_time_score'] = driver.find_element(By.XPATH, '/html/body/main/div/section[2]/div/table[2]/tbody/tr[2]/td[2]').text

    decklists = []
    for player in players:

        driver.get(player['link_decklist'])

        decklist = {}
        
        e_pokemon_cards = driver.find_elements(By.XPATH, '/html/body/main/div/div[1]/div[2]/div[1]/div[1]/div/a[1]')
        e_trainer_cards = driver.find_elements(By.XPATH, '/html/body/main/div/div[1]/div[2]/div[1]/div[2]/div/a[1]')
        card_urls = [element.get_attribute('href') for element in e_pokemon_cards]
        card_urls.extend([element.get_attribute('href') for element in e_trainer_cards])
        card_names = [element.find_element(By.XPATH, './span[2]').text for element in e_pokemon_cards]
        card_names.extend([element.find_element(By.XPATH, './span[2]').text for element in e_trainer_cards])
        card_amounts = [element.find_element(By.XPATH, './span[1]').text for element in e_pokemon_cards]
        card_amounts.extend([element.find_element(By.XPATH, './span[1]').text for element in e_trainer_cards])
        cards_to_iterate = zip(card_urls, card_names, card_amounts)

        decklists.append({'player': player, 'cards': cards_to_iterate})

    for decklist in decklists:

        cards_to_iterate = decklist['cards']
        player = decklist['player']

        for card_url, card_name, card_amount in cards_to_iterate:
            card = {}
            driver.get(card_url)

            card['name_card'] = card_name
            card['amount_card'] = card_amount
            
            e_card = driver.find_element(By.XPATH, '/html/body/main/div/section[1]/div[2]/table/tbody/tr[2]')

            try:
                card_url = e_card.find_element(By.XPATH, './td[1]/a').get_attribute('href')
                set_card = card_url.split('/')[-2]
                number_card = card_url.split('/')[-1]
                number_card = (3 - len(number_card)) * '0' + number_card
                card['id_card'] = set_card + number_card
            except:
                card['id_card'] = 'None'
            
            try:
                card['price_card'] = float(e_card.find_element(By.XPATH, './td[2]/a').text.replace('$',''))
            except:
                card['price_card'] = float(e_card.find_element(By.XPATH, './td[3]/a').text.replace('€','')) * 1.05
                try:
                    card['price_card'] = 'None'
                except:
                    card['price_card'] = 'None'
            
            card['type_card'] = driver.find_element(By.XPATH, '/html/body/main/div/section[1]/div[1]/div[2]/div[1]/div[1]/div[1]/p[2]').text.split('-')[0].strip()
            if card['type_card'] == 'Pokémon':
                card['energy_type_card'] = driver.find_element(By.XPATH, '/html/body/main/div/section[1]/div[1]/div[2]/div[1]/div[1]/div[1]/p[1]').text.split('-')[1].strip()
            else:
                card['energy_type_card'] = 'None'


            with open('data/tournaments.csv', 'a') as f:
                f.write(f'{card["id_card"]},{card["name_card"]},{card["amount_card"]},{card["price_card"]},{card["energy_type_card"]},{card["type_card"]},{player["combo_type_id"]},{player["combo_type_name"]},{player["id_player"]},{player["name_player"]},{player["country_player"]},{player["all_time_score"]},{player["ranking_player_tournament"]},{tournament["id_tournament"]},{tournament["category_tournament"]},{tournament["name_tournament"]},{tournament["country_tournament"]},{tournament["year_tournament"]},{tournament["month_tournament"]},{tournament["day_tournament"]},{tournament["valid_rotation_at_tournament"]}\n')
    
    driver.close()

In [None]:
# Iterate through all the categories
for category, tournament_links in tournament_links_by_type.items():
    
    # Iterate through all the tournaments
    for tl in tournament_links:

        # Call the function to get the data from a tournament
        tournament_data = get_tournament_data(category, tl)