In [1]:
# Archetype Win Rate Data Downloader
# Downloads the latest card_archetype_game_winrates.csv from the ManaCore GitHub repo.
# Run this whenever you want an up-to-date local copy of the archetype data.
#
# What it does:
#   1. Fetches the CSV from the ManaCore GitHub repo (raw)
#   2. Filters to Season-All only
#   3. Removes card-archetype rows with 15 or fewer games played (too noisy)
#   4. Keeps only scryfallId, archetype, games_played columns
#   5. Saves the cleaned data to data/archetype_decktype_data/archetype_data.csv

import pandas as pd
import requests
import os
from io import StringIO

# Project root is one level up from this scripts/ folder
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'data', 'archetype_decktype_data')

RAW_URL = "https://raw.githubusercontent.com/GuySchnidrig/ManaCore/main/data/processed/card_archetype_game_winrates.csv"
MIN_GAMES = 15

print(f"Downloading from: {RAW_URL}")

response = requests.get(RAW_URL)
response.raise_for_status()

df = pd.read_csv(StringIO(response.text))
print(f"Downloaded {len(df):,} rows")

# Filter to Season-All only
df = df[df['season_id'] == 'Season-All']
print(f"After Season-All filter: {len(df):,} rows")

# Remove low-sample rows
df = df[df['games_played'] > MIN_GAMES]
print(f"After removing games_played <= {MIN_GAMES}: {len(df):,} rows")

# Keep only relevant columns
df = df[['scryfallId', 'archetype', 'games_played']]

print(f"Unique cards: {df['scryfallId'].nunique():,} | Archetypes: {sorted(df['archetype'].unique())}")

# Save cleaned data
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_file = os.path.join(OUTPUT_DIR, 'archetype_data.csv')
df.to_csv(output_file, index=False)

print(f"Saved {len(df):,} rows to {output_file}")

Downloading from: https://raw.githubusercontent.com/GuySchnidrig/ManaCore/main/data/processed/card_archetype_game_winrates.csv
Downloaded 5,369 rows
After Season-All filter: 2,251 rows
After removing games_played <= 15: 1,334 rows
Unique cards: 580 | Archetypes: ['Aggro', 'Aggro-Combo', 'Aggro-Control (Tempo)', 'Combo', 'Combo-Control', 'Control', 'Control-Aggro (Midrange)']
Saved 1,334 rows to c:\Users\Dimlas\Desktop\Dimi\Github\CubeOCR\data\archetype_decktype_data\archetype_data.csv
