# Pokémon TCG webscraper

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re
from threading import Thread

## Getting all cards and their data

In [None]:
def get_card_code_from_link(link):
    set_code = link.split('/')[-2]
    number_card = link.split('/')[-1]
    number_card = (3 - len(number_card)) * '0' + number_card
    return set_code + number_card

In [None]:
# Set the URL that we will start scraping from
URL = 'https://limitlesstcg.com/cards'

# Open Chrome
driver = webdriver.Chrome()

# Open the URL
driver.get(URL)

# Let's scrape all the links to the sets
e_sets = driver.find_elements(By.XPATH, '/html/body/main/div/table/tbody/tr/td[1]/a')
set_links = [element.get_attribute('href') for element in e_sets]

# Create a list to store roughly all the cards codes that have been already scraped
scraped_cards = []
output = {} # key is code for the first print for that card, value is a list of other printings

# Let's enter the page for each set and scrape the links to each card
for set_link in set_links:
    
    # Go to set link
    driver.get(set_link)
    
    # Get all the links to the cards
    e_all_cards = driver.find_element(By.CLASS_NAME, 'card-search-grid').find_elements(By.XPATH, './a')
    cards_links = [e.get_attribute('href') for e in e_all_cards]

    # Then enter the page for each card and scrape the data
    for card_link in cards_links:
        
        # Get the card code
        card_code = get_card_code_from_link(card_link)
        
        # If this card has already been scraped, then skip it
        if card_code in scraped_cards:
            continue

        # Go to card link
        driver.get(card_link)

        # Get the first print of this card and then all the other printings
        e_other_printings = driver.find_element(By.CLASS_NAME, 'card-prints-versions').find_elements(By.XPATH, './tbody/tr/td[1]/a')

        # Initialize the first print variables
        is_first_print = True
        first_print = None

        # Get the first print of this card and all the other printings
        for e in e_other_printings:
            
            try:
                code = get_card_code_from_link(e.get_attribute('href'))
            except: # if e has no href, then it is the current card
                code = card_code

            if code not in scraped_cards:
                scraped_cards.append(card_code)

                if is_first_print:
                    first_print = code
                    is_first_print = False
                    output[first_print] = []
                else:
                    output[first_print].append(code)


# Write the output to a file
with open('card_printings.txt', 'w') as f:
    f.write('<first_print>:<print1>,<print2>,...,<printn>\n')
    for key, value in output.items():
        f.write(key + ':' + ','.join(value) + '\n')

# Close the session
driver.close()