In [1]:
import numpy

In [2]:
import pandas as pd
import matplotlib as plt
import seaborn as sns
import json
import requests
from bs4 import BeautifulSoup as BS
import re
import time
import cloudscraper
import random

In [None]:
url = 'https://mtgdecks.net/Standard/metagame:last-3-months'

In [None]:
scraper = cloudscraper.create_scraper(
    browser={
        'browser':'chrome',
        'platform':'windows',
        'mobile':'false'
    },
    delay=10
)

In [None]:
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1"
}
scraper.headers.update(headers)

In [None]:
try:
    response = scraper.get(url)
    print(f'Status Code: {response.status_code}')
    if response.status_code == 403:
        print('Still blocked. Check if JS engine is needed or try Selenium.')
        print('Error Body:', response.text[:500])
    else:
        soup = BS(response.text, 'html.parser')
        print('Cloudscraper successful!')
        
        deck_data = []
        deck_containers = soup.find_all('td', class_='sort')
        print(f'Found {len(deck_containers)} deck containers')
        
        for i, deck in enumerate(deck_containers):
            try:
                strong_tag = deck.find('strong', attrs={'name': True})
                if strong_tag:
                    deck_name = strong_tag['name'].strip()
                    a_tag = strong_tag.find('a', class_='text-uppercase')
                    deck_name_alt = a_tag.text.strip() if a_tag else deck_name
                    deck_data.append({'Deck_Name': deck_name_alt})
                    print(f'Found deck: {deck_name_alt}')
                time.sleep(random.uniform(2, 5))
            except AttributeError as e:
                print(f'Error on deck {i+1}: {e}')
                continue

        if deck_data:
            df = pd.DataFrame(deck_data)
            df.to_csv('mtg_deck_names.csv', index=False)
            print(df.head())
        else:
            print('No data foundâ€”check selectors.')

except Exception as e:
    print(f'Cloudscraper failed: {e}. Trying Selenium fallback...')

In [None]:
deck_data

In [None]:
try:
    response = scraper.get(url)
    print(f'Status Code: {response.status_code}')
    if response.status_code == 200:
        soup = BS(response.text, 'html.parser')
        rows = soup.find_all('tr')
        meta_data = []
        for row in rows[:21]:
            cells = row.find_all('td')
            if len(cells) >= 2:
                deck_name_cell = None
                for cell in cells:
                    strong = cell.find('strong', attrs={'name': True})
                    if strong:
                        deck_name_cell = cell
                        break
                    a_tag = cell.find('a', class_='text-uppercase')
                    if a_tag:
                        deck_name_cell = cell
                        break
                meta_share_cell = None
                for cell in cells:
                    if re.search(r'\d+\.?\d*%', cell.text):
                        meta_share_cell = cell
                        break
                if deck_name_cell and meta_share_cell:
                    deck_name = deck_name_cell.text.strip().split()[0]
                    meta_share = re.search(r'\d+\.?\d*%', meta_share_cell.text).group()
                    meta_data.append({'Archetype_Name': deck_name, 'Meta_Share': meta_share})
                    print(f'Archetype: {deck_name}, Meta Share: {meta_share}')
        
        if meta_data:
            df = pd.DataFrame(meta_data)
            df.to_csv('mtg_meta_share.csv', index=False)
            print(f'Saved {len(meta_data)} meta entries')

except Exception as e:
    print(f'Error: {e}')

In [3]:
deck_name_df = pd.read_csv('../data/mtg_deck_names.csv')

In [4]:
meta_share_df = pd.read_csv('../data/mtg_meta_share.csv')

In [None]:
deck_name_df

In [None]:
meta_share_df

In [5]:
deck_name_df = deck_name_df.iloc[:20]

In [6]:
meta_share_df['Deck_Name'] = deck_name_df['Deck_Name'].values

In [None]:
meta_share_df

In [7]:
cols = ['Deck_Name'] + [col for col in meta_share_df.columns if col != 'Deck_Name']

In [8]:
standard_df = meta_share_df[cols]

In [9]:
standard_df.to_csv('standard_meta.csv', index=False)

In [10]:
decklist = pd.read_csv('../data/Vivi_Decklist.csv')

In [11]:
decklist

Unnamed: 0,Card Name,Copies?,Card Type,Color,Cost
0,Marauding Mako,4,Creature,Red,1.0
1,Fear of Missing Out,4,Creature,Red,2.0
2,Steamcore Scholar,3,Creature,Blue,3.0
3,Vivi Ornitier,4,Creature,Izzet,3.0
4,Quantum Riddler,4,Creature,Blue,5.0
5,Tersa Lightshatter,1,Creature,Red,3.0
6,Into the Floodmaw,4,Instant,Blue,1.0
7,Torch the Tower,4,Instant,Red,1.0
8,Abrade,2,Instant,Red,2.0
9,Winternight Stories,4,Sorcery,Blue,3.0


In [12]:
standard_df

Unnamed: 0,Deck_Name,Archetype_Name,Meta_Share
0,Izzet Cauldron,Izzet,22.70%
1,Dimir Midrange,Dimir,15.53%
2,Mono-Red Aggro,Mono-Red,5.61%
3,Azorius Control,Azorius,5.46%
4,Rogue,Rogue,3.93%
5,Esper Pixie,Esper,2.48%
6,Gruul Delirium,Gruul,2.25%
7,Golgari Midrange,Golgari,2.12%
8,Mono Red Dragons,Mono,2.07%
9,Gruul Aggro,Gruul,1.79%


In [14]:
standard_df.dtypes

Deck_Name         object
Archetype_Name    object
Meta_Share        object
dtype: object

In [16]:
standard_df['Meta_Share'] = standard_df['Meta_Share'].str.replace('%','',regex=False)

In [19]:
standard_df['Meta_Share'] = pd.to_numeric(standard_df['Meta_Share'])

In [20]:
standard_df.dtypes

Deck_Name          object
Archetype_Name     object
Meta_Share        float64
dtype: object