In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_data_from_column(rows, column_header):
    column_data = []
    header_index = -1

    # Find the index of the column header
    for index, row in enumerate(rows):
        headers = row.find_all('th')
        for idx, header in enumerate(headers):
            if header.text.strip() == column_header:
                header_index = idx
                break
        if header_index != -1:
            break

    if header_index == -1:
        return column_data

    # Extract data from the specified column based on the index
    for row in rows:
        columns = row.find_all('td')
        if len(columns) > header_index:
            data = columns[header_index].text.strip()
            column_data.append(data)

    return column_data

url = 'https://shop.tcgplayer.com/price-guide/flesh-and-blood-tcg'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the dropdown menu using the provided 'id' attribute
dropdown_menu = soup.find('select', {'id': 'set'})

if dropdown_menu:
    set_names = [option['value'] for option in dropdown_menu.find_all('option')]
    data_list = []
    current_set_name = ""

    for set_name in set_names:
        set_url = f"{url}/{set_name}"
        set_response = requests.get(set_url)
        set_soup = BeautifulSoup(set_response.content, 'html.parser')

        # Get the set name from the previous 'h3' element
        heading = set_soup.find('h3')
        if heading:
            current_set_name = heading.text.strip()

        # Find the last table element with class 'tablesorter'
        table = set_soup.find_all('table', class_='tablesorter')[-1]
        if table:
            if current_set_name:
                print(f"Found data in {current_set_name}")
            else:
                print("Found data in unknown set")

            # Extract data from the specified column based on column headers
            rows = table.find_all('tr')
            print(f"Number of rows in table: {len(rows)}")

            product_column = extract_data_from_column(rows, "PRODUCT")
            rarity_column = extract_data_from_column(rows, "Rarity")
            number_column = extract_data_from_column(rows, "Number")
            market_price_column = extract_data_from_column(rows, "Market Price")
            listed_median_column = extract_data_from_column(rows, "Listed Median")

            # Combine the data from all columns into a list of dictionaries
            combined_data = []
            for i in range(len(product_column)):
                data_dict = {
                    "PRODUCT": product_column[i],
                    "Rarity": rarity_column[i],
                    "Number": number_column[i],
                    "Market Price": market_price_column[i],
                    "Listed Median": listed_median_column[i]
                }
                combined_data.append(data_dict)

            data_list.extend(combined_data)

        else:
            print(f"No table with class 'tablesorter' found in {set_name}")

    if data_list:
        # Create the pandas dataframe
        df = pd.DataFrame(data_list)

        # Display the dataframe
        df.head(2)
    else:
        print("No valid data found in any table.")
else:
    print("Dropdown menu not found.")


Found data in SHOP
Number of rows in table: 253
Found data in SHOP
Number of rows in table: 30
Found data in SHOP
Number of rows in table: 31
Found data in SHOP
Number of rows in table: 27
Found data in SHOP
Number of rows in table: 27
Found data in SHOP
Number of rows in table: 31
Found data in SHOP
Number of rows in table: 32
Found data in SHOP
Number of rows in table: 256
Found data in SHOP
Number of rows in table: 265
Found data in SHOP
Number of rows in table: 28
Found data in SHOP
Number of rows in table: 27
Found data in SHOP
Number of rows in table: 245
Found data in SHOP
Number of rows in table: 56
Found data in SHOP
Number of rows in table: 428
Found data in SHOP
Number of rows in table: 222
Found data in SHOP
Number of rows in table: 31
Found data in SHOP
Number of rows in table: 31
Found data in SHOP
Number of rows in table: 32
Found data in SHOP
Number of rows in table: 240
Found data in SHOP
Number of rows in table: 31
Found data in SHOP
Number of rows in table: 31
Found 

In [2]:
# making column names all lowercase so they are formatted the same as the other data set
df.columns = map(str.lower, df.columns)
df

Unnamed: 0,product,rarity,number,market price,listed median
0,Alluring Inducement,Majestic,DTD215,$0.76,—
1,Angelic Descent (Blue),Common,DTD034,$0.13,—
2,Angelic Descent (Red),Common,DTD032,$0.10,—
3,Angelic Descent (Yellow),Common,DTD033,$0.16,—
4,Angelic Wrath (Blue),Common,DTD037,$0.12,—
...,...,...,...,...,...
4008,Salt the Wound,Promo,IRA004,$0.61,—
4009,Scar for a Scar,Promo,IRA009,$0.68,—
4010,Springboard Somersault,Promo,IRA012,$3.36,—
4011,Torrent of Tempo,Promo,IRA006,$0.42,—


In [3]:
#sending scraped data to local file so the program to merge runs faster
df.to_csv('fab_scraped_data.csv', index=False)
