Dataset Exploration

In [None]:
# Mostra la struttura effettiva della prima timeline
import json
print(json.dumps(raw_train_df["battle_timeline"].iloc[0], indent=2)[:1500])


In [None]:
# vediamo tutti i pokemon presenti nel dataset

# Estrazione di tutti i Pokémon presenti in tutte le timeline
all_pokemon = set()

for timeline in raw_train_df["battle_timeline"]:
    if isinstance(timeline, list):
        for turn in timeline:
            for side in ["p1_pokemon_state", "p2_pokemon_state"]:
                if isinstance(turn.get(side), dict):
                    name = turn[side].get("name", "").capitalize().strip()
                    if name:
                        all_pokemon.add(name)

pokemon_list = sorted(all_pokemon)
print(f"Totale Pokémon trovati: {len(pokemon_list)}")
print(pokemon_list)


In [None]:
# vediamo quante volte ognuno di essi è stato utilizzato per diverse battaglie
from collections import Counter

pokemon_counter = Counter()

for timeline in raw_train_df["battle_timeline"]:
    if isinstance(timeline, list):
        for turn in timeline:
            for side in ["p1_pokemon_state", "p2_pokemon_state"]:
                if isinstance(turn.get(side), dict):
                    name = turn[side].get("name", "").capitalize().strip()
                    if name:
                        pokemon_counter[name] += 1


pokemon_ranking = sorted(pokemon_counter.items(), key=lambda x: x[1], reverse=True)

# Conversion in dataframe

pokemon_df = pd.DataFrame(pokemon_ranking, columns=["Pokemon", "Occorrenze"])

# 20 most frequent
print(f"Totale Pokémon distinti: {len(pokemon_df)}")
display(pokemon_df.head(20))


# Dataset analysis

In [None]:
# Restriction on first battle
print("\n Structure of the first train battle: ")

if train_data:
    first_battle = train_data[0]
    print(json.dumps(first_battle, indent=2))


In [None]:
# verify now the levels, we know that the pokemon must have level = 100, but let's see if there are some pokemons with different level
non_100_levels = []

for battle in train_data:
    # Controls every pokemon of p1
    for p in battle.get('p1_team_details', []):
        if p.get('level') != 100:
            non_100_levels.append(p.get('level'))

    # controls pk leader of p2
    p2_lead = battle.get('p2_lead_details', {})
    if p2_lead.get('level') != 100:
        non_100_levels.append(p2_lead.get('level'))

if non_100_levels:
    print(f" Found {len(non_100_levels)} Pokémon with a level different from 100")
    print("Examples:", set(non_100_levels))
else:
    print(" Every Pokémon has level = 100")

# only a small portion has a level different from 100, not important

Missing o null values

In [None]:
missing_timeline = sum(1 for b in train_data if not b.get('battle_timeline'))
print(f"Battaglie senza timeline: {missing_timeline}")

missing_team = sum(1 for b in train_data if not b.get('p1_team_details'))
print(f"Battaglie senza p1_team_details: {missing_team}")


# Types Analysis


In [None]:
all_types = set()  # using set to avoid duplicates

for battle in train_data:
    # Player 1 team
    for p in battle.get('p1_team_details', []):
        all_types.update(p.get('types', []))  # add every types of that Pokémon

    # Pokémon lead player 2
    p2 = battle.get('p2_lead_details', {})
    all_types.update(p2.get('types', []))

print(f"Unique types found: {len(all_types)}")
for t in sorted(all_types):
    print(t)

In [None]:
no_real_type_count = 0
pokemons_no_type = []

for battle in train_data:
    # Pokémon player 1
    for p in battle.get('p1_team_details', []):
        types = p.get('types', [])
        if len(types) == 2 and all(t == 'notype' for t in types):
            no_real_type_count += 1
            pokemons_no_type.append(p.get('name'))

    # Pokémon lead player 2
    p2 = battle.get('p2_lead_details', {})
    types = p2.get('types', [])
    if len(types) == 2 and all(t == 'notype' for t in types):
        no_real_type_count += 1
        pokemons_no_type.append(p2.get('name'))

print(f" Pokémon without a real type (both 'notype'): {no_real_type_count}")


## Dizionario con le base stats di tutti i pokemon (special stats)

In [None]:
pokemon_base_stats = {
    "starmie":     {"spa": 100},
    "exeggutor":   {"spa": 125},
    "alakazam":    {"spa": 135},
    "jolteon":     {"spa": 110},
    "chansey":     {"spa": 35},
    "snorlax":     {"spa": 65},
    "tauros":      {"spa": 40},
}
