In [193]:
from bs4 import BeautifulSoup
from bs4 import NavigableString
import requests
import pandas as pd
from re import match, search

## Auxiliary Functions

In [194]:
# name, type_, cost, color, rarity, quality, price, set_, description
def get_card_info(card_list, index):

    # Getting Name
    name = card_list[index].find('span', class_="productDetailTitle").contents[0].contents[0]

    # Getting type
    type_ = str(card_list[index].find('div', class_="productDetailType").contents[0]).replace('\n', '')

    # Getting converted mana cost and color
    cost = 0
    color = ''


    for mana in card_list[index].find('div', class_="productDetailCastCost").findChildren('img'):
        
        if match('\d', mana.attrs['alt']):
            cost += int(mana.attrs['alt'])
        else:
            cost += 1
            color += mana.attrs['alt']


    # Getting description
    description = ''

    for cont in card_list[index].find('td', colspan="2").contents:
        if type(cont) == NavigableString:
            description += cont.replace('\n', '')
        elif 'alt' in cont.attrs:
            description += cont.attrs['alt']

    try:
        # Finding highest available quality
        quality = card_list[index].find('li', class_='active').contents[0]

        # Getting cost of highest available quality
        price = float(card_list[index].find_all('li', class_='active', limit=2)[1].find('span', class_="stylePrice").contents[0].replace('$', '').replace(',', '').strip())

    except Exception:

        # Handling for missing values
        quality = None

        price = -1.00


    # Getting rarity and set
    card_detail = card_list[index].find('div', class_="productDetailSet").contents[1].contents[0].replace('\n', ' ').strip()

    rarity = search('\((\w)\)', card_detail).group(1)
    set_ = search('([^\(\)]+)', card_detail).group(1).strip()

    return [name, type_, cost, color, rarity, quality, price, set_, description]


## Body

In [195]:
# Getting first page in card search.
HEADERS = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'}

# Base case for URL loop
url = 'https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=1'
cookies = None

# Creating dataframe
df_cards = pd.DataFrame(columns=['Name', 'Type', 'Cost', 'Color', 'Rarity', 'Quality', 'Price', 'Set', 'Description'])
count = 0

In [196]:
while(True):

     # For Debug
    print(url)

    # Loading page data

    # Requesting page
    response = requests.get(url, headers=HEADERS, cookies=cookies)

    # Saving progress and raising exception for bad response
    if not response.ok:
        df_cards.to_csv('MTG_Cards.csv',  encoding='utf-8')
        raise Exception(f'Bad Response: {response.status_code}\nUrl: {url}')
 
    # Parsing html
    page = BeautifulSoup(response.content, 'html.parser')


    # Putting data into dataframe

    # Finding cards in page
    cards = page.find_all('div', class_="productItemWrapper productCardWrapper")

    # Adding cards to database
    for index, card in enumerate(cards):
        df_cards.loc[len(df_cards)] = get_card_info(cards, index)


    # Moving oon to next page

    # Looking for next_page_element
    next_page_element = page.find('a', attrs={'aria-label': 'Next'})

    # Breaking out of loop if no next page exists
    if not next_page_element:
        break


    # Saving cookies to allow for next url search
    if cookies == None:
        cookies = response.cookies

    # Getting next page URL
    url = next_page_element.attrs['href']

https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=1
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=2
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=3
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=4
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=5
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=6
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=7
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=8
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=9
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=10
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=11
https://www.cardkingdom.com/catalog/search?search=header&filter%5Bname%5D=&page=12
https://www.c

In [197]:
# Saving data to CSV file
df_cards.to_csv('MTG_Cards.csv',  encoding='utf-8')