In [75]:
import pandas as pd
import numpy as np
import ast
import re

In [76]:
data = pd.read_csv('cards.csv')

In [77]:
data.head(20).to_csv('cards_claude.csv',index=False)

In [78]:
data.shape

(5770, 26)

In [79]:
data.head().T

Unnamed: 0,0,1,2,3,4
abilities,,,,,
artist,Ryo Ueda,Ryo Ueda,Ryo Ueda,5ban Graphics,5ban Graphics
ancientTrait,,,,,
attacks,,,,,
cardmarket,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...
convertedRetreatCost,,,,,
evolvesFrom,,,,,
flavorText,,,,,
hp,,,,,
id,pop5-7,dv1-18,pop8-10,dv1-20,xy0-34


In [80]:
data.tail().T

Unnamed: 0,5765,5766,5767,5768,5769
abilities,,,"[{'name': 'Electric Streamer', 'text': ""As oft...","[{'name': 'Trade', 'text': 'You must discard a...",
artist,,,,,
ancientTrait,,,,,
attacks,"[{'name': 'Insta-Strike', 'cost': ['Colorless'...","[{'name': 'Wide Blast', 'cost': ['Fire', 'Colo...","[{'name': 'Thunderous Bolt', 'cost': ['Lightni...","[{'name': 'Night Joker', 'cost': ['Darkness', ...",
cardmarket,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...,{'url': 'https://prices.pokemontcg.io/cardmark...
convertedRetreatCost,2.0,2.0,2.0,2.0,
evolvesFrom,,Shelgon,Iono's Tadbulb,N's Zorua,
flavorText,,,,,
hp,230.0,320.0,280.0,280.0,
id,sv9-186,sv9-187,sv9-188,sv9-189,sv9-190


In [81]:
data = data.drop(columns=['artist', 'ancientTrait', 'cardmarket', 'flavorText',
                          'images', 'nationalPokedexNumbers', 'rarity',
                          'retreatCost', 'tcgplayer'])

In [82]:
data['standard_legality'] = data['legalities']\
                            .apply(ast.literal_eval)\
                            .apply(lambda d: d.get('standard'))

In [83]:
# Rearrange columns
data = data[['id',
 'supertype',
 'subtypes',
 'types',
 'name',
 'evolvesFrom',
 'hp',
 'convertedRetreatCost',
 'abilities',
 'attacks',
 'resistances',
 'weaknesses',
 'rules',
 'set',
 'number',
 'regulationMark',
 'legalities', #remove after testing
 'standard_legality']].copy()

In [84]:
df_pokemon_cards = data[(data['standard_legality']=='Legal')&\
                        (data['regulationMark']>='G')&
                        (data['supertype']=='Pokémon')].reset_index(drop=True)

In [85]:
df_pokemon_cards.head().T

Unnamed: 0,0,1,2,3,4
id,sv1-8,sv1-1,sv1-6,sv1-12,sv1-13
supertype,Pokémon,Pokémon,Pokémon,Pokémon,Pokémon
subtypes,['Basic'],['Basic'],['Stage 1'],['Stage 1'],['Basic']
types,['Grass'],['Grass'],['Grass'],['Grass'],['Grass']
name,Scatterbug,Pineco,Cacturne,Gogoat,Sprigatito
evolvesFrom,,,Cacnea,Skiddo,
hp,30.0,60.0,130.0,130.0,70.0
convertedRetreatCost,1.0,2.0,2.0,2.0,1.0
abilities,"[{'name': 'Adaptive Evolution', 'text': 'This ...",,"[{'name': 'Counterattack Quills', 'text': ""If ...",,
attacks,"[{'name': 'Tackle', 'cost': ['Grass', 'Colorle...","[{'name': 'Guard Press', 'cost': ['Colorless',...","[{'name': 'Spike Shot', 'cost': ['Colorless', ...","[{'name': 'Rising Lunge', 'cost': ['Colorless'...","[{'name': 'Scratch', 'cost': ['Colorless'], 'c..."


In [86]:
df_pokemon_cards.to_csv('pokemon.csv',index=False)

In [87]:
# Create 3 smaller datasets for Pokemanz, Trainer, and Energy cards
df_trainer_cards = data[data['supertype']=='Trainer'].reset_index(drop=True)
df_energy_cards = data[data['supertype']=='Energy'].reset_index(drop=True)

In [88]:
df_trainer_cards.to_csv('trainer.csv',index=False)
df_energy_cards.to_csv('energy.csv',index=False)

In [89]:
# Transform the subtypes column into a string
df_pokemon_cards['subtypes'] = df_pokemon_cards['subtypes'].apply(
    lambda x: str(sorted(ast.literal_eval(x))) if pd.notnull(x) else []
)

In [90]:
def extract_stage(subtypes):
    '''Extract the Stage of each Pokémon card'''
    if not isinstance(subtypes, str):
        return (None, None)
    if 'Basic' in subtypes:
        return ('Basic', 0)
    elif 'Stage 1' in subtypes:
        return ('Stage 1', 1)
    elif 'Stage 2' in subtypes:
        return ('Stage 2', 2)
    return (None, None)

# Apply the function to your DataFrame
df_pokemon_cards[['stage', 'setup_time']] = df_pokemon_cards['subtypes'].apply(extract_stage).apply(pd.Series)

In [91]:
df_pokemon_cards['is_ex'] = df_pokemon_cards['subtypes'].apply(lambda x: 1 if 'ex' in x else 0)
df_pokemon_cards['is_tera'] = df_pokemon_cards['subtypes'].apply(lambda x: 1 if 'Tera' in x else 0)

In [92]:
df_pokemon_cards['primary_type'] = df_pokemon_cards['types'].apply(
    lambda x: ast.literal_eval(x)[0] if pd.notnull(x) else x
)

In [93]:
df_pokemon_cards['abilities'] = df_pokemon_cards['abilities'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)
df_pokemon_cards['attacks'] = df_pokemon_cards['attacks'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)
df_pokemon_cards['set'] = df_pokemon_cards['set'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)

In [94]:
# Drop rows with missing abilities
df_abilities = df_pokemon_cards.dropna(subset=['abilities']).copy()

# Flatten the abilities list
df_abilities = df_abilities.explode('abilities')
df_abilities['ability_name'] = df_abilities['abilities'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)
df_abilities['ability_text'] = df_abilities['abilities'].apply(lambda x: x.get('text') if isinstance(x, dict) else None)
df_abilities['ability_type'] = df_abilities['abilities'].apply(lambda x: x.get('type') if isinstance(x, dict) else None)

In [95]:
df_abilities = df_abilities[['id', 'ability_name', 'ability_text']].reset_index(drop=True)
df_abilities.to_csv('pokemon_abilities.csv', index=False)

In [96]:
# Drop rows with missing attacks
df_attacks = df_pokemon_cards.dropna(subset=['attacks']).copy()

# Flatten the attacks list
df_attacks = df_attacks.explode('attacks')
df_attacks['attack_name'] = df_attacks['attacks'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)
df_attacks['attack_text'] = df_attacks['attacks'].apply(lambda x: x.get('text') if isinstance(x, dict) else None)
df_attacks['attack_damage'] = df_attacks['attacks'].apply(lambda x: x.get('damage') if isinstance(x, dict) else None)
df_attacks['attack_cost'] = df_attacks['attacks'].apply(lambda x: x.get('cost') if isinstance(x, dict) else None)
df_attacks['attack_energy_cost'] = df_attacks['attacks'].apply(lambda x: x.get('convertedEnergyCost') if isinstance(x, dict) else None)

In [97]:
df_attacks = df_attacks[['id', 'attack_name', 'attack_text', 'attack_damage', 'attack_cost', 'attack_energy_cost']].reset_index(drop=True)
df_attacks.to_csv('pokemon_attacks.csv', index=False)

Get set data

In [98]:
df_pokemon_cards['release_date'] = df_pokemon_cards['set'].apply(lambda x: x.get('releaseDate') if isinstance(x, dict) else None)
df_pokemon_cards['release_date'] = pd.to_datetime(df_pokemon_cards['release_date'], errors='coerce')

# Step 2: Extract the year
df_pokemon_cards['release_year'] = df_pokemon_cards['release_date'].dt.year

In [99]:
df_pokemon_cards['resistances'] = df_pokemon_cards['resistances'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])

In [100]:
df_pokemon_cards['resistance_type'] = df_pokemon_cards['resistances'].apply(lambda x: x[0]['type'] if len(x) > 0 else None)
df_pokemon_cards['resistance_value'] = df_pokemon_cards['resistances'].apply(lambda x: x[0]['value'] if len(x) > 0 else None)

In [101]:
df_pokemon_cards['weaknesses'] = df_pokemon_cards['weaknesses'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])
df_pokemon_cards['weakness_type'] = df_pokemon_cards['weaknesses'].apply(lambda x: x[0]['type'] if len(x) > 0 else None)
df_pokemon_cards['weakness_value'] = df_pokemon_cards['weaknesses'].apply(lambda x: x[0]['value'] if len(x) > 0 else None)

In [102]:
# Convert string to list if necessary
df_pokemon_cards['rules'] = df_pokemon_cards['rules'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) and isinstance(x, str) else x)

In [103]:
def extract_prize_value(rule):
    # Handle missing or empty rule values
    if rule is None or rule == '' or rule == []:
        return 1
    
    # If the rule is a stringified list, convert it to an actual list
    if isinstance(rule, str):
        try:
            rule = ast.literal_eval(rule)
        except Exception:
            return 1

    # At this point, rule should be a list
    if isinstance(rule, list):
        for r in rule:
            match = re.search(r'takes (\d+) Prize', r)
            if match:
                return int(match.group(1))

    return 1 

# Apply it to your dataframe
df_pokemon_cards['prize_card_value'] = df_pokemon_cards['rules'].apply(extract_prize_value)


In [104]:
df_pokemon_cards['prize_card_value'].value_counts()

prize_card_value
1    2018
2     526
Name: count, dtype: int64

In [105]:
# Create the feature flag
df_pokemon_cards['is_immune_to_bench_damage'] = df_pokemon_cards['rules'].apply(
    lambda x: int(any('As long as this Pokémon is on your Bench, prevent all damage done' in rule for rule in x)) if isinstance(x, list) else 0
)

In [106]:
df_pokemon_cards = df_pokemon_cards.merge(df_abilities, how='left', on='id')
df_pokemon_cards = df_pokemon_cards.merge(df_attacks, how='left', on='id')

In [107]:
df_pokemon_cards['attack_damage_amount'] = df_pokemon_cards['attack_damage'].str.extract('([0-9]*)')
df_pokemon_cards['attack_damage_modifier'] = df_pokemon_cards['attack_damage'].str.replace('([0-9])', '')

In [108]:
df_pokemon_cards['cards_needed_for_attack'] = df_pokemon_cards['setup_time'] + df_pokemon_cards['attack_energy_cost']

In [109]:
df_pokemon_cards.columns.to_list()

['id',
 'supertype',
 'subtypes',
 'types',
 'name',
 'evolvesFrom',
 'hp',
 'convertedRetreatCost',
 'abilities',
 'attacks',
 'resistances',
 'weaknesses',
 'rules',
 'set',
 'number',
 'regulationMark',
 'legalities',
 'standard_legality',
 'stage',
 'setup_time',
 'is_ex',
 'is_tera',
 'primary_type',
 'release_date',
 'release_year',
 'resistance_type',
 'resistance_value',
 'weakness_type',
 'weakness_value',
 'prize_card_value',
 'is_immune_to_bench_damage',
 'ability_name',
 'ability_text',
 'attack_name',
 'attack_text',
 'attack_damage',
 'attack_cost',
 'attack_energy_cost',
 'attack_damage_amount',
 'attack_damage_modifier',
 'cards_needed_for_attack']

In [110]:
cols_to_keep = ['id',
 'supertype',
 'subtypes',
 'name',
 'stage',
 'is_ex',
 'is_tera',
 'primary_type',
 'evolvesFrom',
 'hp',
 'ability_name',
 'ability_text',
 'attack_name',
 'attack_text',
 'attack_damage_amount',
 'attack_damage_modifier',
 'attack_cost',
 'cards_needed_for_attack',
 'attack_energy_cost',
 'convertedRetreatCost',
 'regulationMark',
 'prize_card_value',
 'setup_time',
 'resistance_type',
 'resistance_value',
 'weakness_type',
 'weakness_value',
 'is_immune_to_bench_damage',
 'release_date',
 'release_year'
]

In [111]:
df_pokemon_cards = df_pokemon_cards[cols_to_keep]

In [112]:
df_pokemon_cards['attack_damage_amount'] = pd.to_numeric(df_pokemon_cards['attack_damage_amount'], errors='coerce')
df_pokemon_cards['is_coin_flip'] = df_pokemon_cards['attack_text'].str.contains('coin')

In [113]:
def smart_deduplicate_pokemon(df, log=True):
    """
    Drops duplicate Pokémon cards based on gameplay differences.
    Logs details about what was dropped if log=True.
    """
    # Store original before dropping
    original_count = df.shape[0]

    # Perform smart deduplication
    deduped_df = df.drop_duplicates(
        subset=['name', 'attack_text', 'hp', 'ability_text'],
        keep='first'
    ).reset_index(drop=True)

    new_count = deduped_df.shape[0]

    if log:
        print(f"🔵 Before deduplication: {original_count} records")
        print(f"🟢 After deduplication: {new_count} records")
        print(f"🧹 {original_count - new_count} duplicate records removed.\n")

    return deduped_df

In [114]:
df_pokemon_cards = smart_deduplicate_pokemon(df_pokemon_cards)

🔵 Before deduplication: 3863 records
🟢 After deduplication: 2395 records
🧹 1468 duplicate records removed.



In [115]:
df_pokemon_cards['damage_per_energy'] = np.where(
    df_pokemon_cards['attack_energy_cost'] == 0,
    np.nan,  # or 0 if you prefer
    round(df_pokemon_cards['attack_damage_amount'] / df_pokemon_cards['attack_energy_cost'], 2)
)

df_pokemon_cards['damage_per_energy'] = pd.to_numeric(df_pokemon_cards['damage_per_energy'], errors='coerce')

In [116]:
df_pokemon_cards.to_csv('pokemon_cleaned.csv', index=False)

In [117]:
df_pokemon_cards.head(100).to_csv('pokemon_cards_claude.csv',index=False)

In [118]:
df_pokemon_cards

Unnamed: 0,id,supertype,subtypes,name,stage,is_ex,is_tera,primary_type,evolvesFrom,hp,...,setup_time,resistance_type,resistance_value,weakness_type,weakness_value,is_immune_to_bench_damage,release_date,release_year,is_coin_flip,damage_per_energy
0,sv1-8,Pokémon,['Basic'],Scatterbug,Basic,0,0,Grass,,30.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,10.00
1,sv1-1,Pokémon,['Basic'],Pineco,Basic,0,0,Grass,,60.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,5.00
2,sv1-6,Pokémon,['Stage 1'],Cacturne,Stage 1,0,0,Grass,Cacnea,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
3,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,True,15.00
4,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2390,sv9-137,Pokémon,['Basic'],Cramorant,Basic,0,0,Colorless,,110.0,...,0,Fighting,-30,Lightning,×2,0,2025-03-28,2025,True,25.00
2391,sv9-138,Pokémon,['Basic'],Hop's Cramorant,Basic,0,0,Colorless,,110.0,...,0,Fighting,-30,Lightning,×2,0,2025-03-28,2025,False,120.00
2392,sv9-140,Pokémon,['Stage 1'],Oinkologne,Stage 1,0,0,Colorless,Lechonk,120.0,...,1,,,Fighting,×2,0,2025-03-28,2025,True,25.00
2393,sv9-141,Pokémon,['Basic'],Squawkabilly,Basic,0,0,Colorless,,70.0,...,0,Fighting,-30,Lightning,×2,0,2025-03-28,2025,False,


## Evolution Relationships

In [119]:
# Clean up evolution relationships
evolution_edges = df_pokemon_cards[['name', 'evolvesFrom']].dropna()
evolution_edges.columns = ['target', 'source']  # Evolution is from → to
evolution_edges = evolution_edges.drop_duplicates()

evolution_edges['relationship'] = 'evolves_from'

evolution_edges.sort_values(by='target').reset_index(drop=True)

Unnamed: 0,target,source,relationship
0,Abomasnow,Snover,evolves_from
1,Accelgor,Shelmet,evolves_from
2,Aegislash,Doublade,evolves_from
3,Aegislash ex,Doublade,evolves_from
4,Aerodactyl,Antique Old Amber,evolves_from
...,...,...,...
477,Wugtrio ex,Wiglett,evolves_from
478,Xatu,Natu,evolves_from
479,Zebstrika,Blitzle,evolves_from
480,Zoroark,Zorua,evolves_from


In [120]:
from collections import defaultdict

evolution_groups = defaultdict(list)

# Create a mapping from name → evolvesFrom
name_to_evolves = df_pokemon_cards.set_index("name")["evolvesFrom"].dropna().to_dict()

def find_base(name):
    # Walk up the chain until there's no parent
    while name in name_to_evolves:
        name = name_to_evolves[name]
    return name

# Now assign each card to its base evolution family
for name in evolution_edges["target"]:
    base = find_base(name)
    evolution_groups[base].append(name)

In [121]:
evolution_edges = evolution_edges.reset_index(drop=True)

In [122]:
evolution_edges.to_csv('evolution_edges.csv', index=False)

## Trainer Data Wrangling

In [123]:
energy = pd.read_csv('energy.csv')
trainer = pd.read_csv('trainer.csv')

In [124]:
trainer

Unnamed: 0,id,supertype,subtypes,types,name,evolvesFrom,hp,convertedRetreatCost,abilities,attacks,resistances,weaknesses,rules,set,number,regulationMark,legalities,standard_legality
0,pop5-7,Trainer,['Item'],,Rare Candy,,,,,,,,['Choose 1 of your Basic Pokémon in play. If y...,"{'id': 'pop5', 'images': {'symbol': 'https://i...",7,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
1,dv1-18,Trainer,['Pokémon Tool'],,Exp. Share,,,,,,,,"[""Attach a Pokémon Tool to 1 of your Pokémon t...","{'id': 'dv1', 'images': {'symbol': 'https://im...",18,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
2,pop8-10,Trainer,['Item'],,Rare Candy,,,,,,,,['Choose 1 of your Basic Pokémon in play. If y...,"{'id': 'pop8', 'images': {'symbol': 'https://i...",10,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
3,dv1-20,Trainer,['Item'],,Super Rod,,,,,,,,['Shuffle 3 in any combination of Pokémon and ...,"{'id': 'dv1', 'images': {'symbol': 'https://im...",20,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
4,xy0-34,Trainer,['Item'],,Crushing Hammer,,,,,,,,"[""Flip a coin. If heads, discard an Energy att...","{'id': 'xy0', 'images': {'symbol': 'https://im...",34,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,sv9-157,Trainer,['Supporter'],,Ruffian,,,,,,,,"[""Discard a Pokémon Tool and a Special Energy ...","{'id': 'sv9', 'images': {'symbol': 'https://im...",157,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
1031,sv9-158,Trainer,['Item'],,Super Potion,,,,,,,,['Heal 60 damage from 1 of your Pokémon. If yo...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",158,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
1032,sv9-179,Trainer,['Supporter'],,Brock's Scouting,,,,,,,,['Search your deck for up to 2 Basic Pokémon o...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",179,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
1033,sv9-180,Trainer,['Supporter'],,Iris's Fighting Spirit,,,,,,,,['You can use this card only if you discard an...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",180,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal


In [125]:
trainer = trainer.drop(columns=['types', 'evolvesFrom', 'convertedRetreatCost', 'attacks', 'resistances', 'weaknesses' ])
trainer = trainer[trainer['regulationMark']>='G']

In [126]:
trainer.shape

(423, 12)

In [127]:
trainer = trainer[((trainer['regulationMark'].isna()==1)&(trainer['standard_legality']=='Legal'))|
                  ((trainer['regulationMark']>='G')&(trainer['standard_legality']=='Legal'))].reset_index(drop=True)

In [128]:
trainer['subtypes'] = trainer['subtypes'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)

In [129]:
trainer

Unnamed: 0,id,supertype,subtypes,name,hp,abilities,rules,set,number,regulationMark,legalities,standard_legality
0,sv1-166,Trainer,[Supporter],Arven,,,['Search your deck for an Item card and a Poké...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",166,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
1,sv1-167,Trainer,[Stadium],Beach Court,,,"[""The Retreat Cost of each Basic Pokémon in pl...","{'id': 'sv1', 'images': {'symbol': 'https://im...",167,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
2,sv1-173,Trainer,[Item],Energy Switch,,,['Move a Basic Energy from 1 of your Pokémon t...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",173,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
3,sv1-169,Trainer,[Pokémon Tool],Defiance Band,,,"[""If you have more Prize cards remaining than ...","{'id': 'sv1', 'images': {'symbol': 'https://im...",169,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
4,sv1-171,Trainer,[Item],Energy Retrieval,,,['Put up to 2 Basic Energy cards from your dis...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",171,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
...,...,...,...,...,...,...,...,...,...,...,...,...
418,sv9-157,Trainer,[Supporter],Ruffian,,,"[""Discard a Pokémon Tool and a Special Energy ...","{'id': 'sv9', 'images': {'symbol': 'https://im...",157,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
419,sv9-158,Trainer,[Item],Super Potion,,,['Heal 60 damage from 1 of your Pokémon. If yo...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",158,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
420,sv9-179,Trainer,[Supporter],Brock's Scouting,,,['Search your deck for up to 2 Basic Pokémon o...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",179,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal
421,sv9-180,Trainer,[Supporter],Iris's Fighting Spirit,,,['You can use this card only if you discard an...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",180,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal


In [130]:
def extract_subtype(subtypes):
    '''Extract the Stage of each Pokémon card'''
    if not isinstance(subtypes, list):
        return None
    else:
        return subtypes[0]

# Apply the function to your DataFrame
trainer['subtypes_extracted'] = trainer['subtypes'].apply(extract_subtype)

In [131]:
trainer['is_ace_spec'] = trainer['subtypes'].apply(lambda x: 1 if 'ACE SPEC' in x else 0)
trainer['is_future'] = trainer['subtypes'].apply(lambda x: 1 if 'Future' in x else 0)
trainer['is_ancient'] = trainer['subtypes'].apply(lambda x: 1 if 'Ancient' in x else 0)

In [132]:
trainer

Unnamed: 0,id,supertype,subtypes,name,hp,abilities,rules,set,number,regulationMark,legalities,standard_legality,subtypes_extracted,is_ace_spec,is_future,is_ancient
0,sv1-166,Trainer,[Supporter],Arven,,,['Search your deck for an Item card and a Poké...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",166,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Supporter,0,0,0
1,sv1-167,Trainer,[Stadium],Beach Court,,,"[""The Retreat Cost of each Basic Pokémon in pl...","{'id': 'sv1', 'images': {'symbol': 'https://im...",167,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Stadium,0,0,0
2,sv1-173,Trainer,[Item],Energy Switch,,,['Move a Basic Energy from 1 of your Pokémon t...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",173,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Item,0,0,0
3,sv1-169,Trainer,[Pokémon Tool],Defiance Band,,,"[""If you have more Prize cards remaining than ...","{'id': 'sv1', 'images': {'symbol': 'https://im...",169,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Pokémon Tool,0,0,0
4,sv1-171,Trainer,[Item],Energy Retrieval,,,['Put up to 2 Basic Energy cards from your dis...,"{'id': 'sv1', 'images': {'symbol': 'https://im...",171,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Item,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,sv9-157,Trainer,[Supporter],Ruffian,,,"[""Discard a Pokémon Tool and a Special Energy ...","{'id': 'sv9', 'images': {'symbol': 'https://im...",157,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Supporter,0,0,0
419,sv9-158,Trainer,[Item],Super Potion,,,['Heal 60 damage from 1 of your Pokémon. If yo...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",158,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Item,0,0,0
420,sv9-179,Trainer,[Supporter],Brock's Scouting,,,['Search your deck for up to 2 Basic Pokémon o...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",179,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Supporter,0,0,0
421,sv9-180,Trainer,[Supporter],Iris's Fighting Spirit,,,['You can use this card only if you discard an...,"{'id': 'sv9', 'images': {'symbol': 'https://im...",180,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Supporter,0,0,0


## Energy Data Wrangling

In [133]:
energy = energy.drop(columns=['types', 'evolvesFrom', 'convertedRetreatCost',
                              'attacks', 'resistances', 'weaknesses', 'hp', 'abilities'])

In [134]:
energy = energy[((energy['regulationMark'].isna()==1)&(energy['standard_legality']=='Legal'))|
                ((energy['regulationMark']>='G')&(energy['standard_legality']=='Legal'))].reset_index(drop=True)

In [135]:
energy['subtypes'] = energy['subtypes'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)

In [136]:
def extract_subtype(subtypes):
    '''Extract the Stage of each Pokémon card'''
    if not isinstance(subtypes, list):
        return None
    else:
        return subtypes[0]

# Apply the function to your DataFrame
energy['subtypes_extracted'] = energy['subtypes'].apply(extract_subtype)

In [137]:
energy['is_ace_spec'] = energy['subtypes'].apply(lambda x: 1 if 'ACE SPEC' in x else 0)

In [138]:
energy

Unnamed: 0,id,supertype,subtypes,name,rules,set,number,regulationMark,legalities,standard_legality,subtypes_extracted,is_ace_spec
0,col1-88,Energy,[Basic],Grass Energy,,"{'id': 'col1', 'images': {'symbol': 'https://i...",88,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
1,col1-89,Energy,[Basic],Fire Energy,,"{'id': 'col1', 'images': {'symbol': 'https://i...",89,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
2,g1-75,Energy,[Basic],Grass Energy,,"{'id': 'g1', 'images': {'symbol': 'https://ima...",75,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
3,g1-76,Energy,[Basic],Fire Energy,,"{'id': 'g1', 'images': {'symbol': 'https://ima...",76,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
4,col1-90,Energy,[Basic],Water Energy,,"{'id': 'col1', 'images': {'symbol': 'https://i...",90,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
...,...,...,...,...,...,...,...,...,...,...,...,...
188,sve-15,Energy,[Basic],Basic Darkness Energy,,"{'id': 'sve', 'images': {'symbol': 'https://im...",15,,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Basic,0
189,sv8-191,Energy,"[Special, ACE SPEC]",Enriching Energy,['As long as this card is attached to a Pokémo...,"{'id': 'sv8', 'images': {'symbol': 'https://im...",191,H,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Special,1
190,sv8-252,Energy,[Special],Jet Energy,['As long as this card is attached to a Pokémo...,"{'id': 'sv8', 'images': {'symbol': 'https://im...",252,G,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Special,0
191,sv9-159,Energy,[Special],Spiky Energy,"[""As long as this card is attached to a Pokémo...","{'id': 'sv9', 'images': {'symbol': 'https://im...",159,I,"{'unlimited': 'Legal', 'expanded': 'Legal', 's...",Legal,Special,0


## Card Mappings

In [139]:
df_pokemon_cards = pd.read_csv('pokemon_cleaned.csv')

In [140]:
def build_evolution_edges(cards_df: pd.DataFrame) -> pd.DataFrame:
    """
    Builds direct evolution mappings like 'Charmeleon → Charizard' or 'Charmander → Charmeleon'.
    """
    evolution_edges = []

    # Filter only Pokémon cards with evolution data and that are Standard-legal
    evolvers = cards_df[
        (cards_df['supertype'] == 'Pokémon') &
        (cards_df['evolvesFrom'].notnull())]

    for _, row in evolvers.iterrows():
        child = row['name']
        parent = row['evolvesFrom']

        evolution_edges.append({
            'From_Card': parent,
            'To_Card': child,
            'Interaction_Type': 'Evolves Into',
            'Strength': 0.9
        })

    return pd.DataFrame(evolution_edges)


In [141]:
df_pokemon_cards

Unnamed: 0,id,supertype,subtypes,name,stage,is_ex,is_tera,primary_type,evolvesFrom,hp,...,setup_time,resistance_type,resistance_value,weakness_type,weakness_value,is_immune_to_bench_damage,release_date,release_year,is_coin_flip,damage_per_energy
0,sv1-8,Pokémon,['Basic'],Scatterbug,Basic,0,0,Grass,,30.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,10.00
1,sv1-1,Pokémon,['Basic'],Pineco,Basic,0,0,Grass,,60.0,...,0,,,Fire,×2,0,2023-03-31,2023,False,5.00
2,sv1-6,Pokémon,['Stage 1'],Cacturne,Stage 1,0,0,Grass,Cacnea,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
3,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,True,15.00
4,sv1-12,Pokémon,['Stage 1'],Gogoat,Stage 1,0,0,Grass,Skiddo,130.0,...,1,,,Fire,×2,0,2023-03-31,2023,False,36.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2390,sv9-137,Pokémon,['Basic'],Cramorant,Basic,0,0,Colorless,,110.0,...,0,Fighting,-30.0,Lightning,×2,0,2025-03-28,2025,True,25.00
2391,sv9-138,Pokémon,['Basic'],Hop's Cramorant,Basic,0,0,Colorless,,110.0,...,0,Fighting,-30.0,Lightning,×2,0,2025-03-28,2025,False,120.00
2392,sv9-140,Pokémon,['Stage 1'],Oinkologne,Stage 1,0,0,Colorless,Lechonk,120.0,...,1,,,Fighting,×2,0,2025-03-28,2025,True,25.00
2393,sv9-141,Pokémon,['Basic'],Squawkabilly,Basic,0,0,Colorless,,70.0,...,0,Fighting,-30.0,Lightning,×2,0,2025-03-28,2025,False,


In [142]:
def rare_candy_synergy(cards_df: pd.DataFrame) -> pd.DataFrame:
    rare_candy_edges = []

    # Filter for Pokémon only
    pokemon_df = cards_df[cards_df['supertype'] == 'Pokémon']

    # Group all cards by lowercase name
    name_to_rows = {}
    for _, row in pokemon_df.iterrows():
        name = row['name'].strip().lower()
        name_to_rows.setdefault(name, []).append(row)

    # All Stage 2s
    stage2s = pokemon_df[
        (pokemon_df['stage'] == 'Stage 2') &
        (pokemon_df['evolvesFrom'].notnull())
    ]

    for _, stage2 in stage2s.iterrows():
        stage1_name = stage2['evolvesFrom'].strip().lower()
        stage1_list = name_to_rows.get(stage1_name, [])

        if not stage1_list:
            continue

        # Pick first Stage 1 card matching the name
        stage1 = stage1_list[0]

        if pd.notna(stage1.get('evolvesFrom')):
            maybe_basic_name = stage1['evolvesFrom'].strip().lower()
            basic_list = name_to_rows.get(maybe_basic_name, [])

            for basic in basic_list:
                if basic.get('stage') == 'Basic':
                    rare_candy_edges.append({
                        'From_Card': 'Rare Candy',
                        'To_Card': stage2['name'],
                        'Interaction_Type': 'Rare Candy Evolves (Basic → Stage 2)',
                        'Strength': 1.0
                    })
                    break  # One match is enough

    return pd.DataFrame(rare_candy_edges)


In [143]:
evolution_edges = build_evolution_edges(df_pokemon_cards)
rare_candy_edges = rare_candy_synergy(df_pokemon_cards)
all_evolution_synergies = pd.concat([evolution_edges, rare_candy_edges], ignore_index=True)


In [144]:
rare_candy_edges

Unnamed: 0,From_Card,To_Card,Interaction_Type,Strength
0,Rare Candy,Meowscarada,Rare Candy Evolves (Basic → Stage 2),1.0
1,Rare Candy,Meowscarada,Rare Candy Evolves (Basic → Stage 2),1.0
2,Rare Candy,Arboliva,Rare Candy Evolves (Basic → Stage 2),1.0
3,Rare Candy,Vivillon,Rare Candy Evolves (Basic → Stage 2),1.0
4,Rare Candy,Vivillon,Rare Candy Evolves (Basic → Stage 2),1.0
...,...,...,...,...
216,Rare Candy,N's Klinklang,Rare Candy Evolves (Basic → Stage 2),1.0
217,Rare Candy,Hop's Corviknight,Rare Candy Evolves (Basic → Stage 2),1.0
218,Rare Candy,Hop's Corviknight,Rare Candy Evolves (Basic → Stage 2),1.0
219,Rare Candy,Salamence ex,Rare Candy Evolves (Basic → Stage 2),1.0


In [145]:
all_evolution_synergies.drop_duplicates()

Unnamed: 0,From_Card,To_Card,Interaction_Type,Strength
0,Cacnea,Cacturne,Evolves Into,0.9
1,Skiddo,Gogoat,Evolves Into,0.9
3,Floragato,Meowscarada,Evolves Into,0.9
5,Tarountula,Spidops ex,Evolves Into,0.9
6,Dolliv,Arboliva,Evolves Into,0.9
...,...,...,...,...
1297,Rare Candy,Alolan Golem,Rare Candy Evolves (Basic → Stage 2),1.0
1303,Rare Candy,Mamoswine ex,Rare Candy Evolves (Basic → Stage 2),1.0
1305,Rare Candy,N's Klinklang,Rare Candy Evolves (Basic → Stage 2),1.0
1307,Rare Candy,Hop's Corviknight,Rare Candy Evolves (Basic → Stage 2),1.0


In [146]:
all_evolution_synergies.to_csv('synergies.csv',index=False)

In [147]:
# Re-import necessary packages after code execution state reset
import pandas as pd
import networkx as nx
from pyvis.network import Network

# Load the cleaned synergy dataset
df = pd.read_csv("synergies.csv")  # Make sure it's in the same folder

# Build the graph
G = nx.DiGraph()
for _, row in df.iterrows():
    G.add_node(row['From_Card'], title=row['From_Card'])
    G.add_node(row['To_Card'], title=row['To_Card'])
    G.add_edge(row['From_Card'], row['To_Card'], title=row['Interaction_Type'], weight=row['Strength'])

# Visualize using PyVis
net = Network(height="750px", width="100%", directed=True)
net.from_nx(G)
net.repulsion(node_distance=200, central_gravity=0.3)

# Save and open the result
net.save_graph("pokemon_synergy_graph.html")


In [148]:
from pyvis.network import Network
import networkx as nx
import pandas as pd
import os
from IPython.display import display, IFrame

# Load data
df = pd.read_csv("synergies.csv")

# Build the graph
G = nx.DiGraph()
for _, row in df.iterrows():
    G.add_node(row['From_Card'], title=row['From_Card'])
    G.add_node(row['To_Card'], title=row['To_Card'])
    G.add_edge(row['From_Card'], row['To_Card'], title=row['Interaction_Type'], weight=row['Strength'])

# Create PyVis network
net = Network(height="600px", width="100%", directed=True)
net.from_nx(G)
net.repulsion(node_distance=200, central_gravity=0.3)

# Save the graph to HTML
file_path = "pokemon_synergy_graph.html"
net.save_graph(file_path)

# Display inside the notebook
display(IFrame(src=file_path, width="100%", height="600px"))


In [149]:
def build_synergy_graph(deck_dict, evolution_csv_path):
    # Load evolution edges
    df = pd.read_csv(evolution_csv_path)
    df.columns = df.columns.str.strip().str.lower()  # Clean column names
    print("CSV Columns:", df.columns.tolist())       # Debug check

    G = nx.DiGraph()

    for card in deck_dict:
        G.add_node(card)

    for _, row in df.iterrows():
        src = row['from'].strip()
        tgt = row['to'].strip()
        if src in deck_dict and tgt in deck_dict:
            G.add_edge(src, tgt, type='evolution')

    return G
