In [None]:
# IMPORT
# ======

import json
import copy
import pandas as pd
import os
from tqdm.notebook import tqdm
import numpy as np
from collections import Counter
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score

In [None]:
# CONSTANT
# ========

# List of Gen 1 OverUsed Pokémon
POKEDEX_OU = {
    "alakazam": {
        "name" : "alakazam",
        "types": ["psychic"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "chansey": {
        "name" : "chansey",
        "types": ["normal"],
        "real_hp": 703, "real_atk": 108, "real_def": 108,
        "real_spa": 308, "real_spe": 198,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "cloyster": {
        "name" : "cloyster",
        "types": ["water", "ice"],
        "real_hp": 303, "real_atk": 288, "real_def": 458,
        "real_spa": 268, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "exeggutor": {
        "name" : "exeggutor",
        "types": ["grass", "psychic"],
        "real_hp": 393, "real_atk": 288, "real_def": 268,
        "real_spa": 348, "real_spe": 208,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "gengar": {
        "name" : "gengar",
        "types": ["ghost", "poison"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "jolteon": {
        "name" : "jolteon",
        "types": ["electric"],
        "real_hp": 323, "real_atk": 228, "real_def": 218,
        "real_spa": 358, "real_spe": 318,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "jynx": {
        "name" : "jynx",
        "types": ["ice", "psychic"],
        "real_hp": 333, "real_atk": 198, "real_def": 168,
        "real_spa": 288, "real_spe": 288,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "rhydon": {
        "name" : "rhydon",
        "types": ["ground", "rock"],
        "real_hp": 413, "real_atk": 358, "real_def": 338,
        "real_spa": 188, "real_spe": 178,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "snorlax": {
        "name" : "snorlax",
        "types": ["normal"],
        "real_hp": 523, "real_atk": 318, "real_def": 228,
        "real_spa": 228, "real_spe": 158,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "starmie": {
        "name" : "starmie",
        "types": ["water", "psychic"],
        "real_hp": 323, "real_atk": 248, "real_def": 268,
        "real_spa": 298, "real_spe": 328,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "tauros": {
        "name" : "tauros",
        "types": ["normal"],
        "real_hp": 353, "real_atk": 298, "real_def": 288,
        "real_spa": 238, "real_spe": 318,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "zapdos": {
        "name" : "zapdos",
        "types": ["electric", "flying"],
        "real_hp": 383, "real_atk": 278, "real_def": 268,
        "real_spa": 348, "real_spe": 298,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "dragonite": {
        "name" : "dragonite",
        "types": ["dragon", "flying"],
        "real_hp": 325, "real_atk": 266, "real_def": 228,
        "real_spa": 238, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "golem": {
        "name" : "golem",
        "types": ["rock", "ground"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "charizard": {
        "name" : "charizard",
        "types": ["fire", "flying"],
        "real_hp": 363, "real_atk": 318, "real_def": 358,
        "real_spa": 208, "real_spe": 188,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "victreebel": {
        "name" : "victreebel",
        "types": ["grass", "poison"],
        "real_hp": 363, "real_atk": 308, "real_def": 228,
        "real_spa": 298, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "lapras": {
        "name" : "lapras",
        "types": ["water", "ice"],
        "real_hp": 463, "real_atk": 268, "real_def": 258,
        "real_spa": 288, "real_spe": 218,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "slowbro": {
        "name" : "slowbro",
        "types": ["water", "psychic"],
        "real_hp": 393, "real_atk": 248, "real_def": 318,
        "real_spa": 258, "real_spe": 158,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "articuno": {
        "name" : "articuno",
        "types": ["ice", "flying"],
        "real_hp": 383, "real_atk": 268, "real_def": 298,
        "real_spa": 348, "real_spe": 268,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    },
    "persian": {
        "name" : "persian",
        "types": ["normal"],
        "real_hp": 333, "real_atk": 238, "real_def": 218,
        "real_spa": 228, "real_spe": 328,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "moves_used": {}
    }
}

# Type Effectiveness Map
TYPE_CHART = {
    "normal": {"rock": 0.5, "ghost": 0},
    "fire": {"fire": 0.5, "water": 0.5, "grass": 2, "ice": 2, "bug": 2, "rock": 0.5, "dragon": 0.5},
    "water": {"fire": 2, "water": 0.5, "grass": 0.5, "ground": 2, "rock": 2, "dragon": 0.5},
    "electric": {"water": 2, "electric": 0.5, "grass": 0.5, "ground": 0, "flying": 2, "dragon": 0.5},
    "grass": {"fire": 0.5, "water": 2, "grass": 0.5, "poison": 0.5, "ground": 2, "flying": 0.5, "bug": 0.5, "rock": 2, "dragon": 0.5},
    "ice": {"water": 0.5, "grass": 2, "ice": 0.5, "ground": 2, "flying": 2, "dragon": 2},
    "fighting": {"normal": 2, "ice": 2, "poison": 0.5, "flying": 0.5, "psychic": 0.5, "bug": 0.5, "rock": 2, "ghost": 0},
    "poison": {"grass": 2, "poison": 0.5, "ground": 0.5, "bug": 2, "rock": 0.5, "ghost": 0.5},
    "ground": {"fire": 2, "electric": 2, "grass": 0.5, "poison": 2, "flying": 0, "bug": 0.5, "rock": 2},
    "flying": {"electric": 0.5, "grass": 2, "fighting": 2, "bug": 2, "rock": 0.5},
    "psychic": {"fighting": 2, "poison": 2, "psychic": 0.5},
    "bug": {"fire": 0.5, "grass": 2, "fighting": 0.5, "poison": 2, "flying": 0.5, "psychic": 2, "ghost": 0.5},
    "rock": {"fire": 2, "ice": 2, "fighting": 0.5, "ground": 0.5, "flying": 2, "bug": 2},
    "ghost": {"normal": 0, "psychic": 0, "ghost": 2},
    "dragon": {"dragon": 2},
}

# List of high crit move
HIGH_CRIT_MOVES = {"slash", "razorleaf"}

# List of the strongest moves
BEST_MOVES = {
    "thunderwave", "sleeppowder", "swordsdance", "hyperbeam", "blizzard", "earthquake",
    "psychic", "recover", "icebeam", "thunderbolt", "fireblast", "bodyslam",
    "confuseray", "stunspore", "softboiled", "explosion", "selfdestruct", "drillpeck",
    "slash", "razorleaf", "clamp", "agility", "counter", "hypnosis", "wrap"
}

In [None]:
# IMPORT AND FILE UPLOAD
# ======================

# Define the path to our data
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('../input', COMPETITION_NAME)

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')
train_data = []

# Read the file line by line
print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            # json.loads() parses one line (one JSON object) into a Python dictionary
            train_data.append(json.loads(line))

    print(f"Successfully loaded {len(train_data)} battles.")

except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

# Remove the line with the error
train_data = [battle for battle in train_data if battle.get("battle_id") != 4877]

Loading data from '../input/fds-pokemon-battles-prediction-2025/train.jsonl'...
Successfully loaded 10000 battles.


In [None]:
# Get team 1 details from the dictionary
def extract_team_p1(b):
    team = b.get('p1_team_details')
    return {p["name"]:POKEDEX_OU.get(p["name"], {}) for p in team}

# Extends team 2 when it sees a new Pokemon in the timeline
def extend_team_p2(team, turn):
    p2_state = turn.get("p2_pokemon_state")

    if p2_state:
        p_name = p2_state.get("name")
        p2_active_pokemon = POKEDEX_OU.get(p_name, {})
        if p_name not in team: team[p_name] = p2_active_pokemon
        team = compute_team(team, turn, "p2")
    return team

# Function that calls various functions for team composition
def compute_team(team, turn, player_key):
    state = turn.get(player_key+"_pokemon_state")
    team = extract_used_move(team, turn, state, player_key)
    team = compute_remaining_hp(team, state)
    team = compute_status(team, state)
    return team

# Updates the status of the Pokémon in the field 
# if it is not "nostatus" or pops if the status is "fnt"
def compute_status(team, state):
    poke = team[state["name"]]
    poke["current_status"] = state.get("status")
    return team

# Extracts and adds to a Pokemon's "moves_used" dictionary the move used this turn
def extract_used_move(team, turn, state, player_key):
    move = turn.get(player_key+"_move_details")

    if move:
        m_name = move["name"]
        team[state["name"]] = copy.deepcopy(team[state["name"]])
        used = team[state["name"]]["moves_used"]
        
        if m_name not in used:
            move["count"] = 0
            used.setdefault(m_name, move)
        used[m_name]["count"] += 1
    return team

# Calculate percentage of remaining HP
def compute_remaining_hp(team, state):
    poke = team[state["name"]]
    poke["current_hp_pct"] = state.get("hp_pct")
    return team

# Calculate the multiplier of a move on a Pokemon
def move_mult_calculator(move_type, pokemon_type):
    mult = 1.0

    for type in pokemon_type:
        mult *= TYPE_CHART.get(move_type, {}).get(type, 1.0)
    return mult

# Calculates the average effective damage of a move on a Pokemon
def move_dmg_calculator(base_power, atk_attacker, def_defender, stab, type):
    # 0,9254901960784314 is avg value of random 
    # random is realized as a multiplication by a random uniformly distributed integer 
    # between 217 and 255 (inclusive), followed by an integer division by 255. 
    dmg = ((21 * base_power * (atk_attacker / def_defender)) / 25) + 2 * stab * type * 0.9254901960784314
    return dmg
    
# Calculate the percentage of damage a move does to a Pokemon to determine whether that move is a critical or a miss.
def move_pct_dmg_calculator(turn, attacker_key, defender_key):
    attacker_state = turn.get(f"{attacker_key}_pokemon_state")
    defender_state = turn.get(f"{defender_key}_pokemon_state")
    attacker_poke_name = attacker_state.get("name")
    defender_poke_name = defender_state.get("name")
    attacker_move = turn.get(f"{attacker_key}_move_details")
    base_power_attacker = attacker_move.get("base_power")
    attacker_move_cat = attacker_move.get("category")
    attaker_poke = POKEDEX_OU.get(attacker_poke_name)
    defender_poke = POKEDEX_OU.get(defender_poke_name)
    attacker_move_types = attacker_move.get("type").lower()

    if attacker_move_cat == "PHYSICAL":
        attacker_real_atk = attaker_poke.get("real_atk")
        defender_real_def = defender_poke.get("real_def")
    else:
        attacker_real_atk = attaker_poke.get("real_spe") 
        defender_real_def = defender_poke.get("real_spe")

    if attacker_move_types in POKEDEX_OU[attacker_poke_name]["types"]: stab = 1.5
    else: stab = 1

    mult = move_mult_calculator(attacker_move_types, POKEDEX_OU[defender_poke_name]["types"])
    dmg = move_dmg_calculator(base_power_attacker, attacker_real_atk, defender_real_def, stab, mult)
    return (dmg / defender_poke.get("real_hp"))

# Calculates the real speed of a Pokemon in that turn, also considering the status
def extract_current_real_speed(team, turn, player_key):
    state = turn.get(f"{player_key}_pokemon_state")
    if turn.get(f"{player_key}_move_details"):
        real_spd = POKEDEX_OU.get(state.get("name")).get("real_spe")
        pokemon = team.get(state.get("name"))

        if pokemon: got_status = pokemon.get("status")
        else: got_status = "nostatus"
        
        if got_status == "par": return real_spd * 0.25
        elif got_status in ["slp", "frz"]: return 0
        else: return real_spd
    else: return 0

# Updates the volatile effects of the Pokémon in the field if it is not "nostatus"
def count_volatile_effects(turn, effects_counter, player_key):
    poke = turn.get(player_key+"_pokemon_state")
    effects = poke.get("effects", [])
    if "noeffect" not in effects:
        for e in effects:
            effects_counter[e.lower()] += 1

    return effects_counter

def create_features(data: list[dict]) -> pd.DataFrame:
    feature_list = []

    for battle in tqdm(data, desc="Extracting features"):
        features = {}
        p1_starts = 0
        p2_starts = 0
        p1_avg_mov_dmg = 0
        p2_avg_mov_dmg = 0
        p1_miss = 0
        p1_crit = 0
        p2_crit = 0
        p2_miss = 0
        p1_no_move = 0
        p2_no_move = 0
        p1_move_status = 0
        p2_move_status = 0
        
        # Battle ID
        features['battle_id'] = battle.get('battle_id')

        # Match Winner
        if 'player_won' in battle: features['player_won'] = int(battle['player_won'])

        # Player 1 Team
        p1_team = extract_team_p1(battle)
        p2_team = {}

        # Timeline
        timeline = battle.get("battle_timeline", [])

        for turn in timeline:
            p1_state = turn.get("p1_pokemon_state")
            p2_state = turn.get("p2_pokemon_state")
            p1_move = turn.get("p1_move_details")
            p2_move = turn.get("p2_move_details")

            # Update starts counter
            p1_speed = extract_current_real_speed(p1_team, turn, "p1")
            p2_speed = extract_current_real_speed(p2_team, turn, "p2")
            if p1_speed > p2_speed: p1_starts += 1 
            elif p2_speed > p1_speed: p2_starts += 1
            
            # P1 update crit, miss and avg_move counter
            if p1_move:
                if p1_move.get("category") == "STATUS": p1_move_status += 1
                else:
                    pokemon = p2_team.get(p2_state.get("name"))

                    if pokemon: hp = pokemon.get("current_hp_pct")
                    else: hp = 1.0
                    p1_move_dmg = move_pct_dmg_calculator(turn, "p1", "p2")
                    real_dmg = hp - turn.get("p2_pokemon_state").get("hp_pct")

                    if real_dmg == 0: p1_miss += 1
                    elif p1_move_dmg * 1.3 < real_dmg: p1_crit += 1
                    else: p1_avg_mov_dmg += 1
            else: p1_no_move += 1

            # P2 update crit, miss and avg_move counter
            if p2_move:
                if p2_move.get("category") == "STATUS": p2_move_status += 1
                else:
                    p2_move_dmg = move_pct_dmg_calculator(turn, "p2", "p1")
                    real_dmg = p1_team.get(p1_state.get("name")).get("current_hp_pct") - p1_state.get("hp_pct")
                    
                    if real_dmg == 0: p2_miss += 1
                    elif p2_move_dmg * 1.3 <= real_dmg: p2_crit += 1
                    else: p2_avg_mov_dmg += 1
            else: p2_no_move += 1
               
            p1_team = compute_team(p1_team, turn, "p1")
            p2_team = extend_team_p2(p2_team, turn)

        p1_team = list(p1_team.values())
        p2_team = list(p2_team.values())

        p1_team_len = len(p1_team)
        p2_team_len = len(p2_team)
        
        for stat in ["real_hp","real_atk","real_def","real_spa","real_spe", "current_hp_pct"]:
            # Player 1 team Pokémon's stats
            vals = [p[stat] for p in p1_team]
            tot = sum(vals)
            avg = tot/p1_team_len

            if stat == "current_hp_pct": features[f"p1_{stat}_mean"] = avg
                
            # Player 2 team Pokémon's stats
            vals2 = [p[stat] for p in p2_team]

            # Add 1.0 for every missing slot
            if stat == "current_hp_pct":
                missing = 6 - p2_team_len
                vals2.extend([1.0] * missing)
                tot2 = sum(vals2)
                avg2 = tot2/6
                features[f"p2_{stat}_mean"] = avg2
            else:
                tot2 = sum(vals2)
                avg2 = tot2/p2_team_len
                features[f"avg_{stat}_diff"] = avg - avg2
      
        # Statuses counts
        p1_statuses = Counter([t["p1_pokemon_state"]["status"] for t in timeline if t["p1_pokemon_state"]["status"] != "nostatus"])
        p2_statuses = Counter([t["p2_pokemon_state"]["status"] for t in timeline if t["p2_pokemon_state"]["status"] != "nostatus"])
        for s,c in p1_statuses.items():
            features[f"p1_status_{s}_count"] = c
        for s,c in p2_statuses.items():
            features[f"p2_status_{s}_count"] = c

        features["diff_starts"] = p1_starts - p2_starts
        features["diff_crit"] = p1_crit - p2_crit
        features["diff_miss"] = p1_miss - p2_miss
        features["diff_no_move"] = p1_no_move - p2_no_move
        features["diff_move_status"] = p1_move_status - p2_move_status
        
        p1_last_status = [p["current_status"] for p in p1_team if p["current_status"] not in ["fnt", "nostatus"]]
        p2_last_status = [p["current_status"] for p in p2_team if p["current_status"] not in ["fnt", "nostatus"]]

        features["p1_count_last_status"] = len(p1_last_status)
        features["p2_count_last_status"] = len(p2_last_status)        
        
        feature_list.append(features)
        
    return pd.DataFrame(feature_list).fillna(0)

# Create feature DataFrames for both training and test sets
print("Processing training data...")
train_df = create_features(train_data)

print("\nProcessing test data...")
test_data = []
with open(test_file_path, 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
test_df = create_features(test_data)

print("\nTraining features preview:")
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 2000)
display(train_df.head())

Processing training data...


Extracting features:   0%|          | 0/10000 [00:00<?, ?it/s]


Processing test data...


Extracting features:   0%|          | 0/5000 [00:00<?, ?it/s]


Training features preview:


Unnamed: 0,battle_id,player_won,avg_real_hp_diff,avg_real_atk_diff,avg_real_def_diff,avg_real_spa_diff,avg_real_spe_diff,p1_current_hp_pct_mean,p2_current_hp_pct_mean,p1_status_par_count,p1_status_slp_count,p1_status_fnt_count,p2_status_frz_count,p2_status_slp_count,p2_status_fnt_count,p2_status_par_count,diff_starts,diff_crit,diff_miss,diff_no_move,diff_move_status,p1_count_last_status,p2_count_last_status,p1_status_frz_count,p1_status_tox_count,p1_status_psn_count,p2_status_tox_count,p2_status_psn_count,p2_status_brn_count,p1_status_brn_count
0,0,1,-50.833333,2.5,6.666667,2.5,35.0,0.645469,0.44125,5.0,1.0,1.0,11.0,1.0,1.0,4.0,10,0,-2,-11,7,2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1,15.0,0.0,5.0,-20.0,-36.666667,0.263333,0.428333,6.0,2.0,3.0,0.0,3.0,0.0,2.0,-1,2,1,0,-1,0,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1,29.166667,60.833333,43.333333,-40.833333,-65.0,0.696667,0.798333,8.0,6.0,1.0,0.0,10.0,0.0,4.0,-14,4,2,-5,6,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,1,39.166667,-52.5,-28.333333,28.333333,0.0,0.34,0.476667,17.0,0.0,3.0,0.0,5.0,0.0,0.0,-11,4,-3,2,3,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,1,-27.666667,-2.333333,24.333333,9.0,1.0,0.626667,0.525,7.0,1.0,1.0,0.0,1.0,0.0,21.0,10,7,2,0,-6,2,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# TRAINING LOGISTIC REGRESSION MODEL
# ==================================

features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']

X_test = test_df[features]

print("Training model...")
model = make_pipeline(StandardScaler(), LogisticRegression(
    random_state=42, 
    C=0.11408772250991077, 
    max_iter=2500, 
    solver='lbfgs', 
    penalty='l2')
)

model.fit(X_train, y_train)
print("Model training complete.")

#CrossValidatio Test
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_train, y_train, cv=cv, scoring="accuracy", n_jobs=-1)
print("LR 5-fold CV accuracy: %.4f ± %.4f" % (scores.mean(), scores.std()))

Training model...
Model training complete.
LR 5-fold CV accuracy: 0.8456 ± 0.0050


In [None]:
# MAKE PREDICTION ON THE TEST DATA
# ================================

print("Generating predictions on the test set...")
test_predictions = model.predict(X_test)

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})

# Save the DataFrame to a .csv file
submission_df.to_csv('submission.csv', index=False)
print("\n'submission.csv' file created successfully!")

Generating predictions on the test set...

'submission.csv' file created successfully!
