# Beautiful Soup

In [None]:
from bs4 import BeautifulSoup
import requests

We want to grab the href link within the anchor tag of the "deck_article-card-container"

In [None]:
page_to_scrape = requests.get('https://ygoprodeck.com/deck-search/?&_sft_category=tournament%20meta%20decks&offset=0')
print(page_to_scrape)
soup = BeautifulSoup(page_to_scrape.text, 'html.parser')
print(soup)
links = soup.findAll('a', class_='deck_article-card-container')
for link in links:
    href = link.get('href')
    if href:
        print(href)

Unfortunately that class is loaded dynamically within the div with classes "deck-layout-flex grid-of-decks justify-content-center"  
BeautifulSoup can only handle static html so we have to use selenium to pull this info

# Selenium

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import random
import time

In [None]:
driver = webdriver.Chrome()
driver.get('https://ygoprodeck.com/deck-search/?&_sft_category=tournament%20meta%20decks&offset=0')

print("deck link,\t\t\tdate published")

try:
    TARGET_CLASS = 'deck_article-card-container'
    RELATIVE_DATE_CLASS = "deck_article-card-stats text-left d-block" #info class
    #need to wait until the page loads the divs containing the links
    elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, TARGET_CLASS))
    )

    # Once the content is loaded, you can extract it
    for element in elements:
        anchor = element.find_element(By.TAG_NAME, 'a')
        href = anchor.get_attribute('href')
        info = element.find_elements(By.TAG_NAME, 'span')
        #should give us three span tags, we are interested in the text within the second one
        relative_date = info[1].text
        print(href, '\t\t\t', relative_date)
finally:
    # Close the WebDriver instance
    driver.quit()


# Grabbing the Next Page

In [None]:
driver = webdriver.Chrome()
driver.get('https://ygoprodeck.com/deck-search/?&_sft_category=tournament%20meta%20decks&tournament=European%20Championships&offset=0')

try:
    DECK_LINK_TARGET_CLASS = 'deck_article-card-container'
    RELATIVE_DATE_CLASS = "deck_article-card-stats text-left d-block" #info class
    NEXT_PAGE_TARGET_CLASS = 'prevDeck' #button for next page
    DISABLED_CLASS = 'disabled'
    i = 0
    while(i < 5):
        print('PAGE ', i)

        #need to wait until the page loads the divs containing the links
        elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, DECK_LINK_TARGET_CLASS))
        )

        # Once the content is loaded, you can extract it
        for element in elements:
            anchor = element.find_element(By.TAG_NAME, 'a')
            href = anchor.get_attribute('href')
            info = element.find_elements(By.TAG_NAME, 'span')
            if len(info) != 3:
                relative_date = 'NA'
            else:
                relative_date = info[1].text
            print(href, '\t\t\t', relative_date)
        
        next_button = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, NEXT_PAGE_TARGET_CLASS))
        )

        next_button_classes = next_button.get_attribute('class').split()
        if DISABLED_CLASS in next_button_classes:
            print('REACHED LAST PAGE, EXITING LOOP')
            break
        else:
            time.sleep(random.randint(5, 10))
            next_button.click()
        
        i+=1
except:
    print('exception')
    # Close the WebDriver instance
    driver.quit()
