# Pokémon TCG Deck Synergy Calculator

In [1]:
# import libraries for data wrangling
import pandas as pd
import numpy as np
import ast
import re

In [2]:
# import cards that are standard format legal
data = pd.read_csv('cards.csv')

In [3]:
# How many cards are legal in the standard format?
data.shape

(6014, 26)

In [4]:
# Let's look at the first 5 rows
data.head().T

Unnamed: 0,0,1,2,3,4
abilities,,,,,
artist,Ryo Ueda,Ryo Ueda,Ryo Ueda,5ban Graphics,5ban Graphics
ancientTrait,,,,,
attacks,,,,,
cardmarket,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...
convertedRetreatCost,,,,,
evolvesFrom,,,,,
flavorText,,,,,
hp,,,,,
id,pop5-7,dv1-18,pop8-10,dv1-20,xy0-34


In [5]:
# Let's look at the last 5 rows
data.tail().T

Unnamed: 0,6009,6010,6011,6012,6013
abilities,"[{'name': 'Power Saver', 'text': ""This Pokémon...",,"[{'name': 'Biting Spree', 'text': ""When you pl...",,
artist,,,,,
ancientTrait,,,,,
attacks,"[{'name': 'Erasure Ball', 'cost': ['Psychic', ...","[{'name': 'Corkscrew Dive', 'cost': ['Fighting...","[{'name': ""Assassin's Return"", 'cost': ['Darkn...",,
cardmarket,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...
convertedRetreatCost,3.0,,1.0,,
evolvesFrom,,Cynthia's Gabite,Team Rocket's Golbat,,
flavorText,,,,,
hp,280.0,330.0,310.0,,
id,sv10-240,sv10-241,sv10-242,sv10-243,sv10-244


In [6]:
# Remove unused features
data = data.drop(columns=['artist', 'ancientTrait', 'cardmarket', 'flavorText',
                          'images', 'nationalPokedexNumbers', 'rarity',
                          'retreatCost', 'tcgplayer'])

In [7]:
# Extract the standard format legality flag
data['standard_legality'] = data['legalities']\
                            .apply(ast.literal_eval)\
                            .apply(lambda d: d.get('standard'))

In [8]:
# Rearrange columns
data = data[['id',
 'supertype',
 'subtypes',
 'types',
 'name',
 'evolvesFrom',
 'hp',
 'convertedRetreatCost',
 'abilities',
 'attacks',
 'resistances',
 'weaknesses',
 'rules',
 'set',
 'number',
 'regulationMark',
 'legalities',
 'standard_legality']].copy()

In [9]:
data.head().T

Unnamed: 0,0,1,2,3,4
id,pop5-7,dv1-18,pop8-10,dv1-20,xy0-34
supertype,Trainer,Trainer,Trainer,Trainer,Trainer
subtypes,['Item'],['Pokémon Tool'],['Item'],['Item'],['Item']
types,,,,,
name,Rare Candy,Exp. Share,Rare Candy,Super Rod,Crushing Hammer
evolvesFrom,,,,,
hp,,,,,
convertedRetreatCost,,,,,
abilities,,,,,
attacks,,,,,


# Pokémon: Data Wrangling

In [10]:
# Filter the dataset for only Pokémon cards
df_pokemon_cards = data[(data['standard_legality']=='Legal')&\
                        (data['regulationMark']>='G')&
                        (data['supertype']=='Pokémon')].reset_index(drop=True)

In [11]:
# Transform the subtypes column into a string
df_pokemon_cards['subtypes'] = df_pokemon_cards['subtypes'].apply(
    lambda x: str(sorted(ast.literal_eval(x))) if pd.notnull(x) else []
)

In [12]:
def extract_stage(subtypes):
    '''Extract the Stage of each Pokémon card'''
    if not isinstance(subtypes, str):
        return (None, None)
    if 'Basic' in subtypes:
        return ('Basic', 0)
    elif 'Stage 1' in subtypes:
        return ('Stage 1', 1)
    elif 'Stage 2' in subtypes:
        return ('Stage 2', 2)
    return (None, None)

# Apply the function to your DataFrame
df_pokemon_cards[['stage', 'setup_time']] = df_pokemon_cards['subtypes'].apply(extract_stage).apply(pd.Series)

In [13]:
df_pokemon_cards['is_ex'] = df_pokemon_cards['subtypes'].apply(lambda x: 1 if 'ex' in x else 0)
df_pokemon_cards['is_tera'] = df_pokemon_cards['subtypes'].apply(lambda x: 1 if 'Tera' in x else 0)

In [14]:
df_pokemon_cards['primary_type'] = df_pokemon_cards['types'].apply(
    lambda x: ast.literal_eval(x)[0] if pd.notnull(x) else x
)

In [15]:
df_pokemon_cards['abilities'] = df_pokemon_cards['abilities'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)
df_pokemon_cards['attacks'] = df_pokemon_cards['attacks'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)
df_pokemon_cards['set'] = df_pokemon_cards['set'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)

In [16]:
# Drop rows with missing abilities
df_abilities = df_pokemon_cards.dropna(subset=['abilities']).copy()

# Flatten the abilities list
df_abilities = df_abilities.explode('abilities')
df_abilities['ability_name'] = df_abilities['abilities'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)
df_abilities['ability_text'] = df_abilities['abilities'].apply(lambda x: x.get('text') if isinstance(x, dict) else None)
df_abilities['ability_type'] = df_abilities['abilities'].apply(lambda x: x.get('type') if isinstance(x, dict) else None)

In [17]:
df_abilities = df_abilities[['id', 'ability_name', 'ability_text']].reset_index(drop=True)
df_abilities.to_csv('pokemon_abilities.csv', index=False)

In [18]:
# Drop rows with missing attacks
df_attacks = df_pokemon_cards.dropna(subset=['attacks']).copy()

# Flatten the attacks list
df_attacks = df_attacks.explode('attacks')
df_attacks['attack_name'] = df_attacks['attacks'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)
df_attacks['attack_text'] = df_attacks['attacks'].apply(lambda x: x.get('text') if isinstance(x, dict) else None)
df_attacks['attack_damage'] = df_attacks['attacks'].apply(lambda x: x.get('damage') if isinstance(x, dict) else None)
df_attacks['attack_cost'] = df_attacks['attacks'].apply(lambda x: x.get('cost') if isinstance(x, dict) else None)
df_attacks['attack_energy_cost'] = df_attacks['attacks'].apply(lambda x: x.get('convertedEnergyCost') if isinstance(x, dict) else None)

In [19]:
df_attacks = df_attacks[['id', 'attack_name', 'attack_text', 'attack_damage', 'attack_cost', 'attack_energy_cost']].reset_index(drop=True)
df_attacks.to_csv('pokemon_attacks.csv', index=False)

Get set data

In [20]:
df_pokemon_cards['release_date'] = df_pokemon_cards['set'].apply(lambda x: x.get('releaseDate') if isinstance(x, dict) else None)
df_pokemon_cards['release_date'] = pd.to_datetime(df_pokemon_cards['release_date'], errors='coerce')

# Step 2: Extract the year
df_pokemon_cards['release_year'] = df_pokemon_cards['release_date'].dt.year

In [21]:
df_pokemon_cards['resistances'] = df_pokemon_cards['resistances'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])

In [22]:
df_pokemon_cards['resistance_type'] = df_pokemon_cards['resistances'].apply(lambda x: x[0]['type'] if len(x) > 0 else None)
df_pokemon_cards['resistance_value'] = df_pokemon_cards['resistances'].apply(lambda x: x[0]['value'] if len(x) > 0 else None)

In [23]:
df_pokemon_cards['weaknesses'] = df_pokemon_cards['weaknesses'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])
df_pokemon_cards['weakness_type'] = df_pokemon_cards['weaknesses'].apply(lambda x: x[0]['type'] if len(x) > 0 else None)
df_pokemon_cards['weakness_value'] = df_pokemon_cards['weaknesses'].apply(lambda x: x[0]['value'] if len(x) > 0 else None)

In [24]:
# Convert string to list if necessary
df_pokemon_cards['rules'] = df_pokemon_cards['rules'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) and isinstance(x, str) else x)

In [25]:
def extract_prize_value(rule):
    # Handle missing or empty rule values
    if rule is None or rule == '' or rule == []:
        return 1
    
    # If the rule is a stringified list, convert it to an actual list
    if isinstance(rule, str):
        try:
            rule = ast.literal_eval(rule)
        except Exception:
            return 1

    # At this point, rule should be a list
    if isinstance(rule, list):
        for r in rule:
            match = re.search(r'takes (\d+) Prize', r)
            if match:
                return int(match.group(1))

    return 1 

# Apply it to your dataframe
df_pokemon_cards['prize_card_value'] = df_pokemon_cards['rules'].apply(extract_prize_value)


In [26]:
df_pokemon_cards['prize_card_value'].value_counts()

prize_card_value
1    2184
2     569
Name: count, dtype: int64

In [27]:
# Create the feature flag for bench damage immunity
df_pokemon_cards['is_immune_to_bench_damage'] = df_pokemon_cards['rules'].apply(
    lambda x: int(any('As long as this Pokémon is on your Bench, prevent all damage done' in rule for rule in x)) if isinstance(x, list) else 0
)

In [28]:
df_pokemon_cards = df_pokemon_cards.merge(df_abilities, how='left', on='id')
df_pokemon_cards = df_pokemon_cards.merge(df_attacks, how='left', on='id')

In [29]:
df_pokemon_cards['attack_damage_amount'] = df_pokemon_cards['attack_damage'].str.extract('([0-9]*)')
df_pokemon_cards['attack_damage_modifier'] = df_pokemon_cards['attack_damage'].str.replace('([0-9])', '')

In [30]:
df_pokemon_cards['cards_needed_for_attack'] = df_pokemon_cards['setup_time'] + df_pokemon_cards['attack_energy_cost']

In [31]:
cols_to_keep = ['id',
 'supertype',
 'subtypes',
 'name',
 'stage',
 'is_ex',
 'is_tera',
 'primary_type',
 'evolvesFrom',
 'hp',
 'ability_name',
 'ability_text',
 'attack_name',
 'attack_text',
 'attack_damage_amount',
 'attack_damage_modifier',
 'attack_cost',
 'cards_needed_for_attack',
 'attack_energy_cost',
 'convertedRetreatCost',
 'regulationMark',
 'prize_card_value',
 'setup_time',
 'resistance_type',
 'resistance_value',
 'weakness_type',
 'weakness_value',
 'is_immune_to_bench_damage',
 'release_date',
 'release_year'
]

In [32]:
df_pokemon_cards = df_pokemon_cards[cols_to_keep]

In [33]:
df_pokemon_cards['attack_damage_amount'] = pd.to_numeric(df_pokemon_cards['attack_damage_amount'], errors='coerce')
df_pokemon_cards['is_coin_flip'] = df_pokemon_cards['attack_text'].str.contains('coin')

In [34]:
def smart_deduplicate_pokemon(df, log=True):
    """
    Drops duplicate Pokémon cards based on gameplay differences.
    Logs details about what was dropped if log=True.
    """
    # Store original before dropping
    original_count = df.shape[0]

    # Perform smart deduplication
    deduped_df = df.drop_duplicates(
        subset=['name', 'attack_text', 'hp', 'ability_text'],
        keep='first'
    ).reset_index(drop=True)

    new_count = deduped_df.shape[0]

    if log:
        print(f"🔵 Before deduplication: {original_count} records")
        print(f"🟢 After deduplication: {new_count} records")
        print(f"🧹 {original_count - new_count} duplicate records removed.\n")

    return deduped_df

In [35]:
df_pokemon_cards = smart_deduplicate_pokemon(df_pokemon_cards)

🔵 Before deduplication: 4170 records
🟢 After deduplication: 2609 records
🧹 1561 duplicate records removed.



In [36]:
df_pokemon_cards['damage_per_energy'] = np.where(
    df_pokemon_cards['attack_energy_cost'] == 0,
    np.nan,  # or 0 if you prefer
    round(df_pokemon_cards['attack_damage_amount'] / df_pokemon_cards['attack_energy_cost'], 2)
)

df_pokemon_cards['damage_per_energy'] = pd.to_numeric(df_pokemon_cards['damage_per_energy'], errors='coerce')

In [37]:
df_pokemon_cards.to_csv('pokemon_cleaned.csv', index=False)

In [38]:
df_pokemon_cards

Unnamed: 0,id,supertype,subtypes,name,stage,is_ex,is_tera,primary_type,evolvesFrom,hp,...,setup_time,resistance_type,resistance_value,weakness_type,weakness_value,is_immune_to_bench_damage,release_date,release_year,is_coin_flip,damage_per_energy
0,sv1-8,Pokémon,['Basic'],Scatterbug,Basic,0,0,Grass,,30.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,10.00
1,sv1-1,Pokémon,['Basic'],Pineco,Basic,0,0,Grass,,60.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,5.00
2,sv1-6,Pokémon,['Stage 1'],Cacturne,Stage 1,0,0,Grass,Cacnea,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
3,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,True,15.00
4,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2604,sv10-157,Pokémon,['Stage 1'],Swellow,Stage 1,0,0,Colorless,Taillow,100.0,...,1,Fighting,-30,Lightning,×2,0,2025-05-30,2025,False,
2605,sv10-157,Pokémon,['Stage 1'],Swellow,Stage 1,0,0,Colorless,Taillow,100.0,...,1,Fighting,-30,Lightning,×2,0,2025-05-30,2025,False,35.00
2606,sv10-158,Pokémon,['Basic'],Arven's Skwovet,Basic,0,0,Colorless,,60.0,...,0,,,Fighting,×2,0,2025-05-30,2025,False,10.00
2607,sv10-159,Pokémon,['Stage 1'],Arven's Greedent,Stage 1,0,0,Colorless,Arven's Skwovet,120.0,...,1,,,Fighting,×2,0,2025-05-30,2025,False,25.00
