In [1]:
import requests, time
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options

In [2]:
""" Functions for retrieving complete html content from a given URL.
    getSoup uses requests lib for quick retrieval of static websites.
    getSoup_dynamic uses Selenium Chrome driver to load dynamic sites.
"""

def getSoup(URL):
    r = requests.get(URL)
    r.raise_for_status()
    soup = BeautifulSoup(r.content, 'html.parser')
    return soup

def getSoup_dynamic(URL, headless=True, sleep_len=2):
    opts = Options()
    opts.headless = headless
    browser = Chrome(options=opts)
    browser.get(URL)
    time.sleep(sleep_len)
    soup = BeautifulSoup(browser.page_source, 'html.parser')
    browser.close()
    return soup

In [3]:
""" Retrieve the list of urls for different decklists from cEDH decklist database.
    Supports searching for any combination of deck types: competitive, brew, outdated, and/or meme.
    Separates urls by site. Moxfield is by far the most common. Currently there is only 1 competitive
    list that isn't on either Moxfield or Tappedout. 
"""

URL = 'https://cedh-decklist-database.com/'
soup = getSoup(URL)
containers = soup.findAll('ul', class_='ddb-decklists')

def _section(c):
    return c.parent.parent.find('div', class_='ddb-section').get_text().strip()

types = ['COMPETITIVE', 'BREW'] #COMPETITIVE, BREW, OUTDATED, MEME
urls = []
for c in containers:
    if _section(c) not in types:
        continue
    decks = c.findAll('li')
    for d in decks:
        urls.append(d.a['href'])

print(len(urls))

moxfield = [h for h in urls if 'moxfield.com' in h]
tappedout = [h for h in urls if 'tappedout.net' in h]

#THESE SITES NOT SUPPORTED (yet?)
#scryfall = [h for h in urls if 'scryfall.com' in h]
#archidekt = [h for h in urls if 'archidekt.com' in h]
#deckstats = [h for h in urls if 'deckstats.net' in h]
#deckbox = [h for h in urls if 'deckbox.org' in h]

found = moxfield + tappedout #+ scryfall + archidekt + deckstats + deckbox
print(len(found))
print([h for h in urls if h not in found])

235
234
['https://archidekt.com/decks/74512#A_Song_of_Turns_and_Dongers']


In [8]:
""" Define the Deck class to store info.
"""

class Deck:
    def __init__(self, commanders, decklist, URL, name):
        self.commanders = commanders
        self.decklist = decklist
        self.URL = URL
        self.name = name
        
    def info(self):
        print(self.URL)
        print(self.name)
        print(len(self.decklist))
        print(self.commanders)
        print(self.decklist)


In [5]:
""" Functions for scraping decklists from each website. Returns a populated Deck object.
"""

def getMoxfield(URL):
    cards = []
    commanders = []
    soup = getSoup_dynamic(URL)
    name = soup.find('span', class_='deckheader-name').get_text().strip()
    containers = soup.find('div', class_='deckview').findAll('table', class_='table-deck')
    for c in containers:
        table_name = c.find('thead').get_text()
        if 'Sideboard' in table_name:
            continue
        elif 'Commander' in table_name:
            subcontainers = c.findAll('tr', class_='table-deck-row')
            for sc in subcontainers:
                commanders.append(sc.a.get_text())
        else:
            subcontainers = c.findAll('tr', class_='table-deck-row')
            for sc in subcontainers:
                cards.append(sc.a.get_text())
    
    return Deck(commanders, cards, URL, name)

def getTappedout(URL):
    soup = getSoup(URL)
    name = soup.find('div', class_='well-jumbotron').find('h2').get_text().strip()
    containers = soup.findAll('li', class_='member')
    imgs = soup.findAll('img', class_='commander-img')
    cards = []
    commanders = []
    for c in imgs:
        commanders.append(c.parent['data-name'])
    for c in containers:
        cards.append(c.a['data-name'])
    return Deck(commanders, cards, URL, name)

In [9]:
decks = []
for url in moxfield:
    d = getMoxfield(url)
    d.info()
    decks.append(d)
for url in tappedout:
    d = getTappedout(url)
    d.info()
    decks.append(d)

https://www.moxfield.com/decks/C5xyxwytW0eaKZkCPtkbrg
Turbo Seeker Inalla
99
['Inalla, Archmage Ritualist']
['Dockside Extortionist', 'Gilded Drake', 'Hullbreacher', 'Imperial Recruiter', 'Notion Thief', 'Opposition Agent', 'Phantasmal Image', 'Scholar of the Ages', 'Simian Spirit Guide', 'Spellseeker', "Thassa's Oracle", 'Vedalken Aethermage', 'Demonic Tutor', 'Exhume', 'Finale of Promise', 'Gamble', 'Imperial Seal', "Praetor's Grasp", 'Reanimate', 'Unearth', 'Wheel of Fortune', 'Wheel of Misfortune', 'Windfall', 'Ad Nauseam', 'Brainstorm', 'Burnt Offering', 'Chain of Vapor', 'Culling the Weak', 'Cyclonic Rift', 'Dark Ritual', 'Demonic Consultation', 'Dispel', 'Entomb', 'Essence Flux', 'Flusterstorm', 'Force of Will', 'Intuition', "Lim-Dûl's Vault", 'Mana Drain', 'Mental Misstep', 'Miscast', 'Misdirection', 'Mystical Tutor', 'Pact of Negation', 'Pyroblast', 'Red Elemental Blast', 'Shallow Grave', 'Snap', 'Swan Song', 'Tainted Pact', 'Vampiric Tutor', 'Arcane Signet', 'Chrome Mox', 'Di

AttributeError: 'NoneType' object has no attribute 'findAll'