In [1]:
import pandas as pd
import matplotlib as plt
import seaborn as sns
import json
import requests
from bs4 import BeautifulSoup as BS
import re
import time
import cloudscraper
import random

In [2]:
url = 'https://mtgdecks.net/Standard/metagame:last-3-months'

In [3]:
scraper = cloudscraper.create_scraper(
    browser={
        'browser':'chrome',
        'platform':'windows',
        'mobile':'false'
    },
    delay=10
)

In [4]:
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1"
}
scraper.headers.update(headers)

In [5]:
try:
    response = scraper.get(url)
    print(f'Status Code: {response.status_code}')
    if response.status_code == 403:
        print('Still blocked. Check if JS engine is needed or try Selenium.')
        print('Error Body:', response.text[:500])
    else:
        soup = BS(response.text, 'html.parser')
        print('Cloudscraper successful!')
        
        deck_data = []
        deck_containers = soup.find_all('td', class_='sort')
        print(f'Found {len(deck_containers)} deck containers')
        
        for i, deck in enumerate(deck_containers):
            try:
                strong_tag = deck.find('strong', attrs={'name': True})
                if strong_tag:
                    deck_name = strong_tag['name'].strip()
                    a_tag = strong_tag.find('a', class_='text-uppercase')
                    deck_name_alt = a_tag.text.strip() if a_tag else deck_name
                    deck_data.append({'Deck_Name': deck_name_alt})
                    print(f'Found deck: {deck_name_alt}')
                time.sleep(random.uniform(2, 5))
            except AttributeError as e:
                print(f'Error on deck {i+1}: {e}')
                continue

        if deck_data:
            df = pd.DataFrame(deck_data)
            df.to_csv('mtg_deck_names.csv', index=False)
            print(df.head())
        else:
            print('No data found—check selectors.')

except Exception as e:
    print(f'Cloudscraper failed: {e}. Trying Selenium fallback...')

Status Code: 200
Cloudscraper successful!
Found 1674 deck containers
Found deck: Izzet Cauldron
Found deck: Dimir Midrange
Found deck: Azorius Control
Found deck: Rogue
Found deck: Mono-Red Aggro
Found deck: Esper Pixie
Found deck: Gruul Delirium
Found deck: Golgari Midrange
Found deck: Mono Red Dragons
Found deck: Golgari Roots
Found deck: Gruul Aggro
Found deck: Mono Black Demons
Found deck: Mono Green Landfall
Found deck: Mono White Tokens
Found deck: Naya Yuna
Found deck: Jeskai Control
Found deck: Orzhov Sacrifice
Found deck: Boros Burn
Found deck: Temur Battlecrier
Found deck: Simic Aggro
Found deck: Jeskai Oculus
Found deck: Orzhov Midrange
Found deck: Azorius Artifacts
Found deck: Boros Arabella
Found deck: 4 Color Control
Found deck: Boros Tokens
Found deck: Izzet Prowess
Found deck: Dimir Control
Found deck: Mono Black Aggro
Found deck: Orzhov Pixie
Found deck: Orzhov Ketramose
Found deck: Boros Mice
Found deck: Mono Green Stompy
Found deck: Jeskai Artifacts
Found deck: Abzan