In [2]:
# This code has been produced by the group "team_pk" group, composed of Leonardo Suriano, Riccardo Pugliese and Mariana Dos Campos.
# In the code we have provided some comments, that aims to help the reader moving around the code and get what the code is doing.
# The whole explanation has been given for each step of the code, from the building features code to the final predictive model. 
# Moreover, we decided to import libraries not all at once, but to import in every cell the libraries that the cell is using. This choice 
# has been made in order to make clear which library has been used in that specific cell.
# In case the comments we added are not enough to satisfy your curiosity, and in case you may need further clarification about function
# taken from libraries, please refer to the documentation of the respective libraries.
# In case you need further clarification about function we created from scratch in our code or about how the libraries functions has
# been used, please feel free to contact us. We will be more than happy to answer all your doubt!!!




# AI assistance disclaimer
# Parts of this code (in particular some comments, the iterative feature search, and minor implementation details) may have been drafted or refined 
# with the help of AI-based tools. The use of AI was strictly limited to these aspects. All core ideas, modeling choices, and logical structures 
# implemented in the code and in the models were entirely conceived and designed by the members of the group, without external intellectual 
# contribution, relying solely on online documentation, our own knowledge and the insights provided by the course lectures.


# ============================================================
# In this first cell, we are doing the following operations:
#   - read the raw battle logs from train.jsonl and test.jsonl
#   - define Pokémon stats (species) and types
#   - define the type chart (which type is strong/weak vs which other type)
#   - create helper functions for:
#       1) safe division
#       2) type effectiveness scoring
#       3) counting statuses, switches, damage, etc.
#   - define and call build_features(dataset) to create:
#       * train_df (features + target player_won)
#       * test_df  (features only, we must predict player_won)
# ============================================================

from pathlib import Path
from collections import Counter
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import json
import warnings
warnings.filterwarnings("ignore")

# =========================
# Train and test dataset input paths
# =========================

DATA_DIR   = Path(r"C:\Users\2003l\OneDrive\Documenti\fds-pokemon-battles-prediction-2025")
TRAIN_FILE = DATA_DIR / "train.jsonl"   
TEST_FILE  = DATA_DIR / "test.jsonl" 

# Small helper: read a .jsonl file (one JSON per line) into a Python list.
# Each line in the file is a JSON object = one battle.
def load_jsonl(path: Path):
    with path.open("r", encoding="utf-8") as f:
        return [json.loads(line) for line in f if line.strip()]

# Load train and test battles into memory as Python lists of dicts.
train_data = load_jsonl(TRAIN_FILE)
test_data  = load_jsonl(TEST_FILE)

# ------------------------------------------------------------
# Below, we are creating a dictionary of base stats for each Pokémon in our battles.
# For each species we store:
#   hp  = base HP
#   atk = attack
#   def = defense
#   spa = special attack
#   spd = special defense
#   spe = speed
# We will use these numbers to compute the features that need "edges",
# like speed advantage, attack advantage, defense advantage, etc.
# ------------------------------------------------------------

species = {
    'alakazam': {'hp': 55, 'atk': 50, 'def': 45, 'spa': 135, 'spd': 135, 'spe': 120},
    'articuno': {'hp': 90, 'atk': 85, 'def': 100, 'spa': 125, 'spd': 125, 'spe': 85},
    'chansey': {'hp': 250, 'atk': 5, 'def': 5, 'spa': 105, 'spd': 105, 'spe': 50},
    'charizard': {'hp': 78, 'atk': 84, 'def': 78, 'spa': 85, 'spd': 85, 'spe': 100},
    'cloyster': {'hp': 50, 'atk': 95, 'def': 180, 'spa': 85, 'spd': 85, 'spe': 70},
    'dragonite': {'hp': 91, 'atk': 134, 'def': 95, 'spa': 100, 'spd': 100, 'spe': 80},
    'exeggutor': {'hp': 95, 'atk': 95, 'def': 85, 'spa': 125, 'spd': 125, 'spe': 55},
    'gengar': {'hp': 60, 'atk': 65, 'def': 60, 'spa': 130, 'spd': 130, 'spe': 110},
    'golem': {'hp': 80, 'atk': 110, 'def': 130, 'spa': 55, 'spd': 55, 'spe': 45},
    'jolteon': {'hp': 65, 'atk': 65, 'def': 60, 'spa': 110, 'spd': 110, 'spe': 130},
    'jynx': {'hp': 65, 'atk': 50, 'def': 35, 'spa': 95, 'spd': 95, 'spe': 95},
    'lapras': {'hp': 130, 'atk': 85, 'def': 80, 'spa': 95, 'spd': 95, 'spe': 60},
    'persian': {'hp': 65, 'atk': 70, 'def': 60, 'spa': 65, 'spd': 65, 'spe': 115},
    'rhydon': {'hp': 105, 'atk': 130, 'def': 120, 'spa': 45, 'spd': 45, 'spe': 40},
    'slowbro': {'hp': 95, 'atk': 75, 'def': 110, 'spa': 80, 'spd': 80, 'spe': 30},
    'snorlax': {'hp': 160, 'atk': 110, 'def': 65, 'spa': 65, 'spd': 65, 'spe': 30},
    'starmie': {'hp': 60, 'atk': 75, 'def': 85, 'spa': 100, 'spd': 100, 'spe': 115},
    'tauros': {'hp': 75, 'atk': 100, 'def': 95, 'spa': 70, 'spd': 70, 'spe': 110},
    'victreebel': {'hp': 80, 'atk': 105, 'def': 65, 'spa': 100, 'spd': 100, 'spe': 70},
    'zapdos': {'hp': 90, 'atk': 90, 'def': 85, 'spa': 125, 'spd': 125, 'spe': 100},
}

# For each Pokémon we are storing here its typing (primary and secondary type).
# Please nota that "notype" means "no second type" in this simplified setup.

types_map = {
    "alakazam": ["notype", "psychic"],
    "articuno": ["flying", "ice"],
    "chansey": ["normal", "notype"],
    "charizard": ["fire", "flying"],
    "cloyster": ["ice", "water"],
    "dragonite": ["dragon", "flying"],
    "exeggutor": ["grass", "psychic"],
    "gengar": ["ghost", "poison"],
    "golem": ["ground", "rock"],
    "jolteon": ["electric", "notype"],
    "jynx": ["ice", "psychic"],
    "lapras": ["ice", "water"],
    "persian": ["normal", "notype"],
    "rhydon": ["ground", "rock"],
    "slowbro": ["psychic", "water"],
    "snorlax": ["normal", "notype"],
    "starmie": ["psychic", "water"],
    "tauros": ["normal", "notype"],
    "victreebel": ["grass", "poison"],
    "zapdos": ["electric", "flying"]
}

# ------------------------------------------------------------
# Type chart: this tells us how strong one attacking type is
# against another defending type. This is due to the fact that 
# pokemon of one specific type may be really strong on some 
# specific type of pokemon but ineffective on others.
# An example has been provided:
#   effectiveness['fire']['grass'] = 2   (super effective)
#   effectiveness['normal']['rock'] = 0.5 (not very effective)
#   effectiveness['ghost']['psychic'] = 0 (no effect)
# ------------------------------------------------------------

# The following table may have been taken from internet, formatted as below and 
# copy pasted here.

effectiveness = {
    'normal':   {'rock': 0.5, 'ghost': 0, 'notype': 1},
    'fire':     {'grass': 2, 'ice': 2, 'bug': 2, 'rock': 0.5, 'fire': 0.5, 'water': 0.5, 'dragon': 0.5},
    'water':    {'fire': 2, 'rock': 2, 'ground': 2, 'water': 0.5, 'grass': 0.5, 'dragon': 0.5},
    'electric': {'water': 2, 'flying': 2, 'ground': 0, 'electric': 0.5, 'grass': 0.5, 'dragon': 0.5},
    'grass':    {'water': 2, 'rock': 2, 'ground': 2, 'fire': 0.5, 'grass': 0.5, 'poison': 0.5, 'flying': 0.5, 'dragon': 0.5},
    'ice':      {'grass': 2, 'ground': 2, 'flying': 2, 'dragon': 2, 'fire': 0.5, 'ice': 0.5, 'water': 0.5},
    'poison':   {'grass': 2, 'poison': 0.5, 'ground': 0.5, 'rock': 0.5, 'ghost': 0.5},
    'ground':   {'fire': 2, 'electric': 2, 'poison': 2, 'rock': 2, 'grass': 0.5, 'flying': 0},
    'flying':   {'grass': 2, 'fighting': 2, 'bug': 2, 'rock': 0.5, 'electric': 0.5},
    'psychic':  {'poison': 2, 'fighting': 2, 'psychic': 0.5},
    'bug':      {'grass': 2, 'psychic': 2, 'poison': 0.5, 'fire': 0.5, 'flying': 0.5},
    'rock':     {'fire': 2, 'ice': 2, 'flying': 2, 'bug': 2, 'ground': 0.5},
    'ghost':    {'ghost': 2, 'psychic': 0},
    'dragon':   {'dragon': 2},
    'notype':   {}
}

# Given two types, we turn the type-chart into a simple score:
#   +1  if attacking_type is super effective vs defending_type
#   -1  if it is not very effective
#   -2  if it does no damage (immune)
#    0  if it is neutral.

def type_match(attacking_type, defending_type):
    v = effectiveness.get(attacking_type, {}).get(defending_type, 1)
    if v == 0:
        return -2
    if v < 1:
        return -1
    if v > 1:
        return 1
    return 0

# ------------------------------------------------------------
# =========================
# Status severity mapping (since pokemon status is a string, here we are mapping it to numeric).
# The bigger the number, the "worse" the status.
#
# - 0 = no status or faint (we treat faint separately in other logic)
# - 1 = mild status (paralysis, burn, poison)
# - 2 = severe (toxic, freeze)
# - 3 = sleep (very strong in Gen 1)
#
# The following table may have been taken from internet, formatted as below and 
# copy pasted here.
# =========================
# ------------------------------------------------------------

MAP_STATUS = {'nostatus':0,'par':1,'brn':1,'psn':1,'tox':2,'frz':2,'slp':3,'fnt':0}

# Sets of move names for special categories:
RECOVERY_MOVES = {'recover','softboiled','rest'}
HIGH_CRIT      = {'slash','crabhammer','razorleaf','karatechop'}
TRAPS          = {'wrap','clamp','firespin'}


# ------------------------------------------------------------
# MAIN FEATURE GENERATOR
# ------------------------------------------------------------
# This is the main feature function.
# It takes the raw list of battles and, for each battle, builds a big
# dictionary of numeric features. Then it returns a DataFrame with one
# row per battle and one column per feature (plus battle_id and player_won).
# ------------------------------------------------------------
# This is the main feature function.
# It takes the raw list of battles and, for each battle, builds a big
# dictionary of numeric features. Then it returns a DataFrame with one
# row per battle and one column per feature (plus battle_id and player_won).
def build_features(dataset):
    """
    Build the full feature set from the raw battle logs.

    Parameters
    ----------
    dataset : list[dict]
        Raw battles from train.jsonl or test.jsonl. Each battle is a dict
        containing at least:
          - 'battle_id'
          - 'battle_timeline' (list of turns; each turn has p1/p2 states and moves)
          - 'player_won' (only in train)

    Returns
    -------
    pd.DataFrame
        One row per battle, with:
          - all features (engineered below)
          - 'battle_id'
          - 'player_won' (only for train).

    General convention
    ------------------
    Almost all features are defined as a *difference* between player 2 and
    player 1 or between (p1 - p2). The sign tells us who is advantaged:
      - Positive value for a "p2 - p1" feature  -> advantage for player 2
      - Negative value for a "p2 - p1" feature  -> advantage for player 1
      - Positive value for a "p1 - p2" feature  -> advantage for player 1
    """

    rows = []

    # We loop over each battle in the dataset
    for battle in tqdm(dataset, desc="Building features"):
        # feats will be a dictionary: key = feature name, value = number
        feats = {}

        # Get the timeline: list of turns (each is a dict with states + moves)
        timeline = battle.get('battle_timeline', []) or []
        den = len(timeline)  # number of turns in the battle

        # --------------------------------------------------------
        # Basic sequences that we extract from the timeline
        # - p1n / p2n: active Pokémon name each turn
        # - p1s / p2s: status of the active Pokémon each turn
        # - p1hp / p2hp: HP% of the active Pokémon each turn
        # - md1 / md2: move details used each turn by each player
        # - eff1 / eff2: set of "effects" currently active (substitute, reflect...)
        # These sequences are the raw material for most of the features below.
        # --------------------------------------------------------
        p1n = [str(t['p1_pokemon_state'].get('name','')).lower() for t in timeline] if den else []
        p2n = [str(t['p2_pokemon_state'].get('name','')).lower() for t in timeline] if den else []
        p1s = [str(t['p1_pokemon_state'].get('status','nostatus')).lower() for t in timeline] if den else []
        p2s = [str(t['p2_pokemon_state'].get('status','nostatus')).lower() for t in timeline] if den else []
        p1hp = [float(t['p1_pokemon_state'].get('hp_pct',0.0)) for t in timeline] if den else []
        p2hp = [float(t['p2_pokemon_state'].get('hp_pct',0.0)) for t in timeline] if den else []
        md1 = [(t.get('p1_move_details') or {}) for t in timeline] if den else []
        md2 = [(t.get('p2_move_details') or {}) for t in timeline] if den else []
        eff1 = [set(t['p1_pokemon_state'].get('effects') or []) for t in timeline] if den else []
        eff2 = [set(t['p2_pokemon_state'].get('effects') or []) for t in timeline] if den else []

        # --------------------------------------------------------
        # Final HP and final status per species (for both players)
        # We keep track of the last HP and last status seen for each species.
        # These are then used to build "end-of-battle" features.
        # --------------------------------------------------------
        hp_last_p1, hp_last_p2 = {}, {}
        st_last_p1, st_last_p2 = {}, {}
        for t in timeline:
            n1 = str(t['p1_pokemon_state'].get('name','')).lower()
            n2 = str(t['p2_pokemon_state'].get('name','')).lower()
            hp_last_p1[n1] = float(t['p1_pokemon_state'].get('hp_pct',0.0))
            hp_last_p2[n2] = float(t['p2_pokemon_state'].get('hp_pct',0.0))
            st_last_p1[n1] = str(t['p1_pokemon_state'].get('status','nostatus')).lower()
            st_last_p2[n2] = str(t['p2_pokemon_state'].get('status','nostatus')).lower()

        # --------------------------------------------------------
        # hp_edge_final:
        #   FINAL HP edge = (mean HP of player 2 team) - (mean HP of player 1 team)
        #   > 0 : on average p2's remaining mons are healthier at the end
        #   < 0 : advantage in HP for p1 at the end
        # --------------------------------------------------------
        mean_hp_p1 = float(np.mean(list(hp_last_p1.values()))) if hp_last_p1 else 0.0
        mean_hp_p2 = float(np.mean(list(hp_last_p2.values()))) if hp_last_p2 else 0.0
        feats['hp_edge_final'] = float(mean_hp_p2 - mean_hp_p1)

        # --------------------------------------------------------
        # p1_alive_final:
        #   Number of p1 Pokémon that still have hp > 0 at the end.
        #   Rough proxy for how many "resources" remain for p1.
        # --------------------------------------------------------
        feats['p1_alive_final'] = int(sum(hp > 0 for hp in hp_last_p1.values()))

        # --------------------------------------------------------
        # p1_status_mean_final:
        #   Average status severity over p1's team at the end.
        #
        # status_severity_gap_final:
        #   FINAL status severity difference = (p2 mean severity) - (p1 mean severity)
        #   If status_severity_gap_final > 0: p2 is "worse off" in terms of status than p1.
        # --------------------------------------------------------
        p1_status_mean_final = float(np.mean([MAP_STATUS.get(s,0) for s in st_last_p1.values()])) if st_last_p1 else 0.0
        p2_status_mean_final = float(np.mean([MAP_STATUS.get(s,0) for s in st_last_p2.values()])) if st_last_p2 else 0.0
        feats['p1_status_mean_final'] = p1_status_mean_final
        feats['status_severity_gap_final'] = float(p2_status_mean_final - p1_status_mean_final)

        # --------------------------------------------------------
        # revealed_count_diff:
        #   (# of distinct species that appeared on the field for p1)
        # - (# of distinct species that appeared on the field for p2).
        #   Positive: p1 revealed more different Pokémon than p2.
        # --------------------------------------------------------
        revealed_p1 = set(p1n)
        revealed_p2 = set(p2n)
        feats['revealed_count_diff'] = int(len(revealed_p1) - len(revealed_p2))

        # --------------------------------------------------------
        # status_turns_advantage:
        #   Sum over turns of (status severity for p1 - status severity for p2).
        #   Positive: across the whole battle p1 suffered "worse" status conditions
        #   for more turns than p2.
        # --------------------------------------------------------
        p1_status_series = [MAP_STATUS.get(s,0) for s in p1s]
        p2_status_series = [MAP_STATUS.get(s,0) for s in p2s]
        feats['status_turns_advantage'] = int(sum(p1_status_series) - sum(p2_status_series))

        # --------------------------------------------------------
        # tox_ratio_diff:
        #   For each player we look at all poison-related turns and compute:
        #      tox_ratio = (# toxic turns) / (# poison+toxic turns)
        #   Then tox_ratio_diff = (p1 tox_ratio - p2 tox_ratio).
        #   Positive: p1 is "more often" under severe toxic than normal poison,
        #   relative to p2.
        # --------------------------------------------------------
        p1_psn_cnt = sum(1 for s in p1s if s in {'psn','tox'})
        p2_psn_cnt = sum(1 for s in p2s if s in {'psn','tox'})
        p1_tox_cnt = sum(1 for s in p1s if s == 'tox')
        p2_tox_cnt = sum(1 for s in p2s if s == 'tox')
        p1_tox_ratio = (p1_tox_cnt / p1_psn_cnt) if p1_psn_cnt else 0.0
        p2_tox_ratio = (p2_tox_cnt / p2_psn_cnt) if p2_psn_cnt else 0.0
        feats['tox_ratio_diff'] = float(p1_tox_ratio - p2_tox_ratio)

        # --------------------------------------------------------
        # HP gap across time (per turn)
        # hp_gap[t] = p2_hp[t] - p1_hp[t]
        #   > 0 : p2 is overall ahead in HP at that turn
        #   < 0 : p1 is ahead
        # --------------------------------------------------------
        hp_gap = (np.array(p2hp) - np.array(p1hp)) if den else np.array([])

        # We split the battle into thirds (early, mid, late) based on number of turns.
        E = max(1, den//3) if den else 1
        mid_start, mid_end = E, max(E*2, min(den, E*2))

        # --------------------------------------------------------
        # Helper to find switch indices (turns when the active mon changes)
        # --------------------------------------------------------
        def switches(seq):
            idx = []
            for i in range(1, len(seq)):
                if seq[i] != seq[i-1]:
                    idx.append(i)
            return idx

        # sw1/sw2: indices of player 1 and 2 switches.
        sw1 = switches(p1n) if den else []
        sw2 = switches(p2n) if den else []

        # --------------------------------------------------------
        # forced_switch_share_diff:
        #   We call a switch "forced" if, right after the switch, the HP gap
        #   moves in the direction suggesting a bad position:
        #     - For p1: hp_gap[i] - hp_gap[i-1] > 0  (gap moves towards p2)
        #     - For p2: hp_gap[i] - hp_gap[i-1] < 0  (gap moves towards p1)
        #
        #   forced_switch_share_diff = (forced switches share for p2) -
        #                              (forced switches share for p1)
        #   Positive: p2 is more often switching from a worse position.
        # --------------------------------------------------------
        forced_p1 = 0
        forced_p2 = 0
        if den and len(hp_gap) > 1:
            for i in sw1:
                if i-1 >= 0 and float(hp_gap[i] - hp_gap[i-1]) > 0:
                    forced_p1 += 1
            for i in sw2:
                if i-1 >= 0 and float(hp_gap[i] - hp_gap[i-1]) < 0:
                    forced_p2 += 1
        p1_forced_share = (forced_p1 / len(sw1)) if sw1 else 0.0
        p2_forced_share = (forced_p2 / len(sw2)) if sw2 else 0.0
        feats['forced_switch_share_diff'] = float(p2_forced_share - p1_forced_share)

        # --------------------------------------------------------
        # active_entropy_diff:
        #   Entropy of the active Pokémon usage distribution for each player.
        #   High entropy -> many different mons used in a balanced way.
        #   active_entropy_diff = H(p1_active) - H(p2_active)
        #   Positive: p1's usage is more diverse than p2's.
        # --------------------------------------------------------
        def entropy(counter):
            tot = float(sum(counter.values()))
            if tot <= 0: return 0.0
            p = [c/tot for c in counter.values()]
            return float(-sum(pi*np.log(max(pi,1e-12)) for pi in p))

        c1 = Counter(p1n); c2 = Counter(p2n)
        feats['active_entropy_diff'] = float(entropy(c1) - entropy(c2))

        # --------------------------------------------------------
        # Damage and healing sequences:
        # p1_loss / p2_loss: HP lost per turn (strictly positive when HP decreases)
        # p1_heal / p2_heal: HP recovered per turn (strictly positive when HP increases)
        # These support damage/healing related features.
        # --------------------------------------------------------
        p1_loss = [max(0.0, p1hp[i-1]-p1hp[i]) for i in range(1, den)] if den>1 else []
        p2_loss = [max(0.0, p2hp[i-1]-p2hp[i]) for i in range(1, den)] if den>1 else []
        p1_heal = [max(0.0, p1hp[i]-p1hp[i-1]) for i in range(1, den)] if den>1 else []
        p2_heal = [max(0.0, p2hp[i]-p2hp[i-1]) for i in range(1, den)] if den>1 else []

        # --------------------------------------------------------
        # p2_late_damage:
        #   Total HP lost by p2 in the late third of the game.
        #   High values: p2 is taking a lot of damage late-game.
        # --------------------------------------------------------
        feats['p2_late_damage'] = float(sum(p2_loss[-E:]) if p2_loss else 0.0)

        # --------------------------------------------------------
        # attacks_rate_diff:
        #   (p1 attacks per turn) - (p2 attacks per turn).
        #   Here an "attack" is any turn where the move has a non-empty name.
        #   Positive: p1 is clicking more moves per turn than p2.
        # --------------------------------------------------------
        p1_attacks = sum(1 for m in md1 if m.get('name'))
        p2_attacks = sum(1 for m in md2 if m.get('name'))
        feats['attacks_rate_diff'] = float((p1_attacks / den if den else 0.0) - (p2_attacks / den if den else 0.0))

        # --------------------------------------------------------
        # bp_mean_p2:
        #   Average base power of moves used by p2 (only non-null moves).
        #   Rough proxy for how "strong" p2's typical move is.
        # --------------------------------------------------------
        feats['bp_mean_p2'] = float(
            np.mean([float(m.get('base_power',0.0)) for m in md2
                     if m.get('name') and m.get('base_power') is not None]) if p2_attacks else 0.0
        )

        # --------------------------------------------------------
        # HP-based global features:
        #   hp_gap_peak           : max(hp_gap[t]) = best HP advantage for p2
        #   hp_gap_peak_turn_share: (turn index of that peak + 1) / total_turns
        #                            (where in the battle that best moment happens)
        #   hp_gap_var            : variance of hp_gap across the battle
        #   hp_gap_autocorr       : correlation between hp_gap[t] and hp_gap[t+1]
        #                           (how persistent the advantage is from turn to turn)
        #   hp_gap_sign_flips     : number of times the sign of hp_gap changes
        #                           (how often the lead changes between players)
        # --------------------------------------------------------
        if den and len(hp_gap):
            feats['hp_gap_peak'] = float(np.max(hp_gap))
            feats['hp_gap_peak_turn_share'] = float((int(np.argmax(hp_gap))+1) / den)
            feats['hp_gap_var'] = float(np.var(hp_gap))

            if len(hp_gap) >= 2 and np.var(hp_gap)>0 and np.var(hp_gap[:-1])>0 and np.var(hp_gap[1:])>0:
                feats['hp_gap_autocorr'] = float(np.corrcoef(hp_gap[:-1], hp_gap[1:])[0,1])
            else:
                feats['hp_gap_autocorr'] = 0.0

            sgn = np.sign(hp_gap)
            feats['hp_gap_sign_flips'] = int(
                sum(1 for a,b in zip(sgn, sgn[1:])
                    if a!=0 and b!=0 and a!=b)
            )
        else:
            feats['hp_gap_peak'] = 0.0
            feats['hp_gap_peak_turn_share'] = 0.0
            feats['hp_gap_var'] = 0.0
            feats['hp_gap_autocorr'] = 0.0
            feats['hp_gap_sign_flips'] = 0

        # --------------------------------------------------------
        # lead_type_edge:
        #   Type matchup edge of the lead (turn 1) position:
        #   we compute how much p2's lead types are good vs p1's lead types,
        #   minus how much p1's lead types are good vs p2's.
        #
        # lead_def_edge:
        #   Base Defense stat difference of the leads = (p2_def - p1_def).
        # --------------------------------------------------------
        if den:
            lead1 = p1n[0]; lead2 = p2n[0]
            t1 = [t for t in types_map.get(lead1, ["notype","notype"]) if t!='notype']
            t2 = [t for t in types_map.get(lead2, ["notype","notype"]) if t!='notype']
            lead_edge = 0
            for a in t2:
                for b in t1:
                    lead_edge += type_match(a,b)
                    lead_edge -= type_match(b,a)
            feats['lead_type_edge'] = int(lead_edge)

            d1 = species.get(lead1,{}).get('def',0); d2 = species.get(lead2,{}).get('def',0)
            feats['lead_def_edge'] = float(d2 - d1)
        else:
            feats['lead_type_edge'] = 0
            feats['lead_def_edge'] = 0.0

        # --------------------------------------------------------
        # types_last_round:
        #   Type edge at the very last turn based on the last two active Pokémon:
        #       sum(type_match(p1_type, p2_type)) - sum(type_match(p2_type, p1_type))
        #   Positive: final board position is type-favorable to p1.
        # --------------------------------------------------------
        if den:
            last1 = p1n[-1]; last2 = p2n[-1]
            tp1 = types_map.get(last1, ["notype","notype"])
            tp2 = types_map.get(last2, ["notype","notype"])
            scf = 0
            for a in tp1:
                for b in tp2:
                    scf += type_match(a,b)
                    scf -= type_match(b,a)
            feats['types_last_round'] = int(scf)
        else:
            feats['types_last_round'] = 0

        # --------------------------------------------------------
        # rs_hit_share_diff:
        #   For each player, we count hits that are:
        #     - super effective (se)
        #     - resisted (rs)
        #     - immune (im)
        #   rs_hit_share_diff = (resisted hits share for p1) - (resisted hits share for p2).
        #
        # immune_count_diff:
        #   (number of times p1 hits into an immunity) - (number of times p2 does).
        #
        # p1_immune_count:
        #   Raw count of p1's hits into immunity.
        # --------------------------------------------------------
        se1=rs1=im1=act1=0
        se2=rs2=im2=act2=0
        for i in range(den):
            m1 = md1[i]; m2 = md2[i]
            if m1.get('name'):
                mv_t = str(m1.get('type','') or '').lower()
                if mv_t:
                    on = p2n[i] if i < len(p2n) else ''
                    for ot in [x for x in types_map.get(on, ["notype","notype"]) if x!='notype']:
                        v = type_match(mv_t, ot)
                        if v > 0: se1 += 1
                        elif v < 0 and v != -2: rs1 += 1
                        elif v == -2: im1 += 1
                    act1 += 1
            if m2.get('name'):
                mv_t = str(m2.get('type','') or '').lower()
                if mv_t:
                    on = p1n[i] if i < len(p1n) else ''
                    for ot in [x for x in types_map.get(on, ["notype","notype"]) if x!='notype']:
                        v = type_match(mv_t, ot)
                        if v > 0: se2 += 1
                        elif v < 0 and v != -2: rs2 += 1
                        elif v == -2: im2 += 1
                    act2 += 1
        rs_share1 = (rs1/act1) if act1 else 0.0
        rs_share2 = (rs2/act2) if act2 else 0.0
        feats['rs_hit_share_diff'] = float(rs_share1 - rs_share2)
        feats['p1_immune_count'] = int(im1)
        feats['immune_count_diff'] = int((im1) - (im2))

        # --------------------------------------------------------
        # boom_count_diff:
        #   (# of boom moves used by p1) - (# used by p2),
        #   where boom moves are Explosion / Selfdestruct.
        #   Positive: p1 uses more self-sacrificing moves.
        # --------------------------------------------------------
        feats['boom_count_diff'] = int(
            sum(1 for m in md1 if str(m.get('name','')).lower() in {'explosion','selfdestruct'})
            - sum(1 for m in md2 if str(m.get('name','')).lower() in {'explosion','selfdestruct'})
        )

        # --------------------------------------------------------
        # counter_count_diff:
        #   (# of "Counter" uses by p1) - (# by p2).
        # --------------------------------------------------------
        feats['counter_count_diff'] = int(
            sum(1 for m in md1 if str(m.get('name','')).lower()=='counter')
            - sum(1 for m in md2 if str(m.get('name','')).lower()=='counter')
        )

        # --------------------------------------------------------
        # move_diversity_p1:
        #   Number of distinct moves used by p1 in the battle.
        #   Measures how many different tools p1 actually clicked.
        # --------------------------------------------------------
        feats['move_diversity_p1'] = int(len({str(m.get('name')) for m in md1 if m.get('name')}))

        # --------------------------------------------------------
        # Usage-weighted base stats:
        # c1 / c2 count how many turns each species was active for p1/p2.
        # wmean(counter, key): weighted average of a base stat (key) over the
        #                      mons in the counter, using "# of turns on field" as weights.
        # --------------------------------------------------------
        c1 = Counter(p1n); c2 = Counter(p2n)
        def wmean(counter, key):
            S = 0.0; W = 0.0
            for nm,w in counter.items():
                if nm in species:
                    S += float(species[nm].get(key,0)) * w
                    W += w
            return (S/W) if W else 0.0

        # --------------------------------------------------------
        # used_mean_spe_diff:
        #   (avg speed of Pokémon used by p1, weighted by turns on field)
        # - (avg speed of Pokémon used by p2, weighted by turns on field).
        #   Positive: p1 tends to have faster mons on the field.
        #
        # p2_used_count:
        #   Number of distinct Pokémon that p2 actually used (took the field).
        # --------------------------------------------------------
        p1_mean_spe_used = wmean(c1,'spe')
        p2_mean_spe_used = wmean(c2,'spe')
        feats['used_mean_spe_diff'] = float(p1_mean_spe_used - p2_mean_spe_used)
        feats['p2_used_count'] = int(len(c2))

        # --------------------------------------------------------
        # eff_speed_adv_share_p2 / eff_speed_edge_avg:
        #   We compute "effective speed" each turn combining:
        #     - base speed
        #     - speed boosts (Gen 1 style multipliers)
        #     - paralysis modifier (0.25 if PAR)
        #
        #   eff_speed_adv_share_p2:
        #       share of turns where p2 effective speed >= p1 effective speed.
        #
        #   eff_speed_edge_avg:
        #       average (p2_effective_speed - p1_effective_speed) over all turns.
        # --------------------------------------------------------
        eff_adv = 0
        edge_sum = 0.0
        for i in range(den):
            s1 = timeline[i]['p1_pokemon_state']; s2 = timeline[i]['p2_pokemon_state']
            n1 = str(s1.get('name','')).lower(); n2 = str(s2.get('name','')).lower()
            base1 = species.get(n1,{})
            base2 = species.get(n2,{})
            b1 = (s1.get('boosts') or {})
            b2 = (s2.get('boosts') or {})

            # Effective speed = base_speed * boost_multiplier
            try:
                sp1 = float(base1.get('spe',0)) * (
                    (2.0 + int(b1.get('spe',0)))/2.0 if int(b1.get('spe',0))>=0
                    else 2.0/(2.0-int(b1.get('spe',0)))
                )
            except:
                sp1 = float(base1.get('spe',0))
            try:
                sp2 = float(base2.get('spe',0)) * (
                    (2.0 + int(b2.get('spe',0)))/2.0 if int(b2.get('spe',0))>=0
                    else 2.0/(2.0-int(b2.get('spe',0)))
                )
            except:
                sp2 = float(base2.get('spe',0))

            # Paralysis penalty
            if str(s1.get('status','')).lower()=='par': sp1 *= 0.25
            if str(s2.get('status','')).lower()=='par': sp2 *= 0.25

            # p2 is faster or equal
            if sp2 >= sp1: eff_adv += 1
            edge_sum += (sp2 - sp1)

        feats['eff_speed_adv_share_p2'] = float((eff_adv/den) if den else 0.0)
        feats['eff_speed_edge_avg'] = float((edge_sum/den) if den else 0.0)

        # --------------------------------------------------------
        # initiative_early_diff / initiative_late_diff:
        #   For early turns and late turns separately, we look at
        #   how often p1's base speed >= p2's base speed.
        #
        #   For a window [start,end):
        #     - let f = # of turns where p1 base speed >= p2 base speed
        #     - L = length of the window
        #     - share_for_p1 = f/L
        #     - share_for_p2 = (L-f)/L
        #   init_share = share_for_p1 - share_for_p2.
        #
        #   initiative_early_diff = init_share in the early third of the battle
        #   initiative_late_diff  = init_share in the final third of the battle
        # --------------------------------------------------------
        def init_share(seq_len, start, end):
            if end <= start or den==0: return 0.0
            f = 0
            for i in range(start, end):
                if i >= den: break
                n1 = p1n[i] if i < len(p1n) else ''
                n2 = p2n[i] if i < len(p2n) else ''
                s1 = species.get(n1,{}).get('spe',0)
                s2 = species.get(n2,{}).get('spe',0)
                if s1 >= s2: f += 1
            L = max(1, end-start)
            return float(f/L) - float((L-f)/L)

        feats['initiative_early_diff'] = float(init_share(den, 0, min(E, den)))
        feats['initiative_late_diff']  = float(init_share(den, max(0, den-E), den))

        # --------------------------------------------------------
        # last_switch_turn_p1:
        #   Last turn index (1-based) on which p1 changes the active Pokémon.
        #   If p1 never switches, we set it to den+1 (a "virtual" late switch).
        # --------------------------------------------------------
        last_sw = 0
        for i in range(1, den):
            if p1n[i] != p1n[i-1]:
                last_sw = i+1
        feats['last_switch_turn_p1'] = int(last_sw if last_sw else den+1)

        # --------------------------------------------------------
        # Ping-pong switching patterns:
        #   Recurring pattern A -> B -> A (returns to the same mon after
        #   exactly one different mon).
        #
        # p1_pingpong_switches:
        #   Number of ping-pong patterns for p1.
        #
        # pingpong_switches_diff:
        #   p1_pingpong_switches - p2_pingpong_switches.
        # --------------------------------------------------------
        def pingpong(seq):
            c = 0
            for i in range(2, len(seq)):
                if seq[i]==seq[i-2] and seq[i]!=seq[i-1]:
                    c += 1
            return c

        feats['p1_pingpong_switches'] = int(pingpong(p1n))
        feats['pingpong_switches_diff'] = int(pingpong(p1n) - pingpong(p2n))

        # --------------------------------------------------------
        # both_switched_share:
        #   Share of turns (from turn 2 onward) where BOTH players switch
        #   between turn i-1 and i.
        # --------------------------------------------------------
        both_sw = 0
        for i in range(1, den):
            if p1n[i]!=p1n[i-1] and p2n[i]!=p2n[i-1]:
                both_sw += 1
        feats['both_switched_share'] = float(both_sw / max(1, (den-1)))

        # --------------------------------------------------------
        # p1_switch_late_share:
        #   Among the last third of turns, how many are p1 switch turns,
        #   normalized by the number of turns in that late window.
        #   Measures how much p1 is "shuffling" late-game.
        # --------------------------------------------------------
        late_sw1 = 0
        for i in range(max(1, den-E), den):
            if i < len(p1n) and i-1 >= 0 and p1n[i] != p1n[i-1]:
                late_sw1 += 1
        feats['p1_switch_late_share'] = float(late_sw1 / max(1, min(E, den-1)))

        # --------------------------------------------------------
        # severe_status_early_share:
        #   Share of early turns where at least one side has a status of
        #   severity >= 2 (e.g. serious conditions like sleep, freeze,
        #   badly poisoned, depending on MAP_STATUS).
        #   It ignores which side, only checks "does somebody suffer a bad status?".
        # --------------------------------------------------------
        severe = [(a>=2 or b>=2) for a,b in zip(p1_status_series, p2_status_series)]
        feats['severe_status_early_share'] = float(
            sum(1 for x in severe[:E] if x) / max(1,E)
        ) if den else 0.0

        # --------------------------------------------------------
        # status_diversity_p1:
        #   Number of different non-"nostatus" statuses p1 has experienced.
        #
        # status_diversity_diff:
        #   status_diversity_p1 - status_diversity_p2.
        # --------------------------------------------------------
        sd1 = len({s for s in p1s if s!='nostatus'})
        sd2 = len({s for s in p2s if s!='nostatus'})
        feats['status_diversity_p1'] = int(sd1)
        feats['status_diversity_diff'] = int(sd1 - sd2)

        # --------------------------------------------------------
        # status_late_share_diff:
        #   In the late third of the battle, we compute:
        #       share of STATUS-category moves for p2
        #     - share of STATUS-category moves for p1
        #   Positive: p2 is using relatively more status moves late-game.
        # --------------------------------------------------------
        def share_status_late(md):
            late = md[max(0, den-E):den]
            tot = sum(1 for m in late if m.get('name'))
            sts = sum(1 for m in late if str(m.get('category','')).upper()=='STATUS' and m.get('name'))
            return (sts/tot) if tot else 0.0

        feats['status_late_share_diff'] = float(share_status_late(md2) - share_status_late(md1))

        # --------------------------------------------------------
        # rec_share_diff:
        #   share of recovery moves used by p1 minus share of recovery moves used by p2.
        #   "Recovery" moves are taken from the RECOVERY_MOVES list.
        # --------------------------------------------------------
        def rec_share(md):
            tot = sum(1 for m in md if m.get('name'))
            rec = sum(1 for m in md if str(m.get('name','')).lower() in RECOVERY_MOVES)
            return (rec/tot) if tot else 0.0

        feats['rec_share_diff'] = float(rec_share(md1) - rec_share(md2))

        # --------------------------------------------------------
        # same_move_streak_max_diff:
        #   For each player we compute the longest streak of repeating the same
        #   move (same name) on consecutive turns. Then:
        #     same_move_streak_max_diff = max_streak_p1 - max_streak_p2
        #   Positive: p1 "spams" the same move more than p2.
        # --------------------------------------------------------
        def same_move_streak_max(md):
            best=cur=0; prev=None
            for m in md:
                nm = str(m.get('name','')).lower() if m.get('name') else None
                if not nm:
                    cur = 0; prev = None
                else:
                    if nm==prev: cur += 1
                    else: cur = 1
                    best = max(best, cur); prev = nm
            return best

        feats['same_move_streak_max_diff'] = int(same_move_streak_max(md1) - same_move_streak_max(md2))

        # --------------------------------------------------------
        # confusion_late_share_diff:
        #   In the late third, share of turns where p1 is confused minus
        #   share of turns where p2 is confused.
        #
        # substitute_late_share_diff:
        #   In the late third, share of turns where p1 has Substitute active
        #   minus share where p2 has Substitute.
        #
        # reflect_early_share_diff:
        #   In the early third, share of turns where p1 has Reflect active
        #   minus share where p2 has Reflect.
        # --------------------------------------------------------
        def eff_share(eff, tag, slc):
            seg = eff[slc]
            return float(sum(1 for s in seg if tag in s) / max(1, len(seg))) if den else 0.0

        early_slice = slice(0, min(E, den))
        late_slice  = slice(max(0, den-E), den)
        feats['confusion_late_share_diff'] = float(
            eff_share(eff1,'confusion', late_slice) - eff_share(eff2,'confusion', late_slice)
        )
        feats['substitute_late_share_diff'] = float(
            eff_share(eff1,'substitute', late_slice) - eff_share(eff2,'substitute', late_slice)
        )
        feats['reflect_early_share_diff'] = float(
            eff_share(eff1,'reflect', early_slice) - eff_share(eff2,'reflect', early_slice)
        )

        # --------------------------------------------------------
        # confusion_turns_diff:
        #   (# of turns p1 is under confusion) - (# of turns p2 is under confusion).
        # --------------------------------------------------------
        feats['confusion_turns_diff'] = int(
            sum(1 for s in eff1 if 'confusion' in s) - sum(1 for s in eff2 if 'confusion' in s)
        )

        # --------------------------------------------------------
        # p1_sleep_streak_max:
        #   Longest consecutive number of turns where p1 is sleeping.
        #
        # sleep_streak_max_diff:
        #   p1_sleep_streak_max - p2_sleep_streak_max.
        # --------------------------------------------------------
        def max_streak(seq, tag):
            b=0; c=0
            for s in seq:
                if s==tag: c+=1; b=max(b,c)
                else: c=0
            return b

        feats['p1_sleep_streak_max'] = int(max_streak(p1s,'slp'))
        feats['sleep_streak_max_diff'] = int(max_streak(p1s,'slp') - max_streak(p2s,'slp'))

        # --------------------------------------------------------
        # p1_turns_par:
        #   Total number of turns where p1 is paralyzed.
        #
        # p2_turns_brn:
        #   Total number of turns where p2 is burned.
        # --------------------------------------------------------
        feats['p1_turns_par'] = int(sum(1 for s in p1s if s=='par'))
        feats['p2_turns_brn'] = int(sum(1 for s in p2s if s=='brn'))

        # --------------------------------------------------------
        # Type coverage / diversity:
        #
        # seen_types(names):
        #   Set of types seen on the field for the given list of Pokémon names.
        #
        # type_seen_count_diff:
        #   (# of distinct types seen from p1's revealed mons)
        # - (# of distinct types seen from p2's revealed mons).
        #
        # p2_seen_type_count:
        #   Raw count of distinct types seen for p2.
        # --------------------------------------------------------
        def seen_types(names):
            st = set()
            for nm in names:
                for tp in types_map.get(nm, ["notype","notype"]):
                    if tp != 'notype':
                        st.add(tp)
            return st

        st1 = seen_types(revealed_p1)
        st2 = seen_types(revealed_p2)
        feats['type_seen_count_diff'] = int(len(st1) - len(st2))
        feats['p2_seen_type_count'] = int(len(st2))

        # --------------------------------------------------------
        # Expected damage features
        #
        # For each move we estimate "expected damage" as:
        #   base power * accuracy * STAB * type multiplier
        #
        #   where:
        #     - STAB = 1.5 if move type in user's types, else 1.0
        #     - type multiplier = product of effectiveness vs opponent's types
        #
        # exp_dmg_stabtype_avg_diff:
        #   (mean expected damage of p2 moves) - (mean expected damage of p1 moves)
        # --------------------------------------------------------
        def types_of(name):
            return [x for x in types_map.get(name, ["notype","notype"]) if x!='notype']

        def type_multiplier(mv_type, opp_name):
            if not mv_type: return 1.0
            mult = 1.0
            for ot in types_of(opp_name):
                mult *= float(effectiveness.get(mv_type, {}).get(ot, 1.0))
            return float(mult)

        exp1 = []
        exp2 = []
        for i in range(den):
            m1 = md1[i]; m2 = md2[i]
            if m1.get('name'):
                bp  = float(m1.get('base_power',0.0) or 0.0)
                acc = float(m1['accuracy']) if (m1.get('accuracy') is not None) else 1.0
                mv  = str(m1.get('type','') or '').lower()
                mon = p1n[i] if i < len(p1n) else ''
                stab = 1.5 if (mv and mv in types_of(mon)) else 1.0
                opp = p2n[i] if i < len(p2n) else ''
                tm  = type_multiplier(mv, opp)
                exp1.append(bp * acc * stab * tm)
            else:
                exp1.append(0.0)
            if m2.get('name'):
                bp  = float(m2.get('base_power',0.0) or 0.0)
                acc = float(m2['accuracy']) if (m2.get('accuracy') is not None) else 1.0
                mv  = str(m2.get('type','') or '').lower()
                mon = p2n[i] if i < len(p2n) else ''
                stab = 1.5 if (mv and mv in types_of(mon)) else 1.0
                opp = p1n[i] if i < len(p1n) else ''
                tm  = type_multiplier(mv, opp)
                exp2.append(bp * acc * stab * tm)
            else:
                exp2.append(0.0)

        exp1 = np.array(exp1) if den else np.array([])
        exp2 = np.array(exp2) if den else np.array([])
        feats['exp_dmg_stabtype_avg_diff'] = float(
            (np.mean(exp2)-np.mean(exp1)) if den else 0.0
        )

        # --------------------------------------------------------
        # switch_delta_exp_damage_diff:
        #   For each player, consider only the turns where that player switches.
        #   For such a turn i, compute:
        #       delta = expected_damage[i] - expected_damage[i-1]
        #   Then take the mean over all their switches.
        #
        #   switch_delta_exp_damage_diff =
        #       mean_delta_for_p2_switches - mean_delta_for_p1_switches
        #
        #   Positive: p2's switches, on average, improve their offensive potential
        #   more than p1's switches improve p1's.
        # --------------------------------------------------------
        def switch_delta(exp_vals, sw_idx):
            vals=[]
            for i in sw_idx:
                prev = exp_vals[i-1] if i-1 >= 0 else 0.0
                vals.append(float(exp_vals[i] - prev))
            return float(np.mean(vals)) if vals else 0.0

        feats['switch_delta_exp_damage_diff'] = float(
            switch_delta(exp2, sw2) - switch_delta(exp1, sw1)
        )

        # --------------------------------------------------------
        # confusion_exp_dmg_ratio_diff:
        #   For each player we compute:
        #       ratio_when_confused = mean(expected damage WHEN confused)
        #                             / mean(expected damage when NOT confused)
        #
        #   confusion_exp_dmg_ratio_diff =
        #       ratio_for_p2 - ratio_for_p1.
        #
        #   This tells us how much confusion "depresses" expected damage for each side.
        # --------------------------------------------------------
        def ratio_when(eff, exp_vals, tag):
            with_tag  = [exp_vals[i] for i,s in enumerate(eff) if tag in s]
            without   = [exp_vals[i] for i,s in enumerate(eff) if tag not in s]
            mw = float(np.mean(with_tag)) if with_tag else 0.0
            mo = float(np.mean(without))  if without else 0.0
            return float(mw / (mo if mo!=0.0 else 1e-9))

        feats['confusion_exp_dmg_ratio_diff'] = float(
            ratio_when(eff2, exp2, 'confusion') - ratio_when(eff1, exp1, 'confusion')
        )

        # --------------------------------------------------------
        # substitute_break_rate_diff:
        #   For each player we count:
        #     - sub_t = number of turns where Substitute is up
        #     - sub_b = number of times a Substitute disappears from one turn
        #              to the next (i.e. is broken).
        #   break_rate = sub_b / sub_t
        #
        #   substitute_break_rate_diff = break_rate_p1 - break_rate_p2.
        #
        #   Positive: p1's Substitutes get broken relatively more often than p2's
        #   (conditional on having a sub up).
        # --------------------------------------------------------
        def breaks(eff):
            return sum(1 for a,b in zip(eff, eff[1:]) if ('substitute' in a and 'substitute' not in b))

        p1_sub_t = sum(1 for s in eff1 if 'substitute' in s)
        p2_sub_t = sum(1 for s in eff2 if 'substitute' in s)
        p1_sub_b = breaks(eff1) if den else 0
        p2_sub_b = breaks(eff2) if den else 0
        r1 = (p1_sub_b / p1_sub_t) if p1_sub_t else 0.0
        r2 = (p2_sub_b / p2_sub_t) if p2_sub_t else 0.0
        feats['substitute_break_rate_diff'] = float(r1 - r2)

        # --------------------------------------------------------
        # Healing efficiency:
        #
        # heal_efficiency_diff:
        #   For each side:
        #     - total_loss = sum of damage taken
        #     - total_heal = sum of healing done (based on HP recovery)
        #     - heal_eff   = total_heal / total_loss
        #
        #   heal_efficiency_diff = heal_eff_p2 - heal_eff_p1.
        #
        # heal_mid_diff / heal_late_diff:
        #   In the mid / late segments of the game we compute:
        #     (healing done by p2) - (healing done by p1)
        # --------------------------------------------------------
        p1_loss_sum = float(sum(p1_loss)) if p1_loss else 0.0
        p2_loss_sum = float(sum(p2_loss)) if p2_loss else 0.0
        p1_heal_sum = float(sum(p1_heal)) if p1_heal else 0.0
        p2_heal_sum = float(sum(p2_heal)) if p2_heal else 0.0
        p1_he_eff = float(p1_heal_sum / (p1_loss_sum if p1_loss_sum!=0.0 else 1e-9))
        p2_he_eff = float(p2_heal_sum / (p2_loss_sum if p2_loss_sum!=0.0 else 1e-9))
        feats['heal_efficiency_diff'] = float(p2_he_eff - p1_he_eff)
        feats['heal_mid_diff'] = float(
            (sum(p2_heal[E:mid_end]) - sum(p1_heal[E:mid_end])) if den>1 else 0.0
        )
        feats['heal_late_diff'] = float(
            (sum(p2_heal[-E:]) - sum(p1_heal[-E:])) if den>1 else 0.0
        )

        # --------------------------------------------------------
        # p2_max_boost_sum:
        #   For every turn we sum all boost stages in p2's "boosts" dict
        #   (e.g. +2 atk, +1 spe, etc.), then take the maximum over the battle.
        #   Positive and large values indicate that at some point p2 was heavily boosted.
        # --------------------------------------------------------
        max_b2 = 0
        for t in timeline:
            b = (t['p2_pokemon_state'].get('boosts') or {})
            s = int(sum(int(v) for v in b.values())) if b else 0
            if s > max_b2:
                max_b2 = s
        feats['p2_max_boost_sum'] = int(max_b2)

        # --------------------------------------------------------
        # atk_edge_used:
        #   p1_mean_atk_team:
        #       average base Attack of p1's whole team (from team details).
        #
        #   p2_mean_atk_used:
        #       usage-weighted average Attack of p2's mons that actually
        #       came on the field (weighted by turns on field).
        #
        #   atk_edge_used = p2_mean_atk_used - p1_mean_atk_team.
        #
        #   Positive: p2's used attackers are, on average, stronger than
        #   p1's team average Attack.
        # --------------------------------------------------------
        p1_team = battle.get('p1_team_details', []) or []
        if p1_team:
            p1_mean_atk_team = float(np.mean([int(p.get('base_atk',0)) for p in p1_team]))
        else:
            p1_mean_atk_team = 0.0
        p2_mean_atk_used = wmean(c2, 'atk')
        feats['atk_edge_used'] = float(p2_mean_atk_used - p1_mean_atk_team)

        # --------------------------------------------------------
        # hp_gap_slope_jump:
        #   We look around the first faint event ("first blood") on any side.
        #   We approximate the slope (trend) of hp_gap in a small window
        #   BEFORE and AFTER the first faint turn; hp_gap_slope_jump is:
        #
        #       slope_post - slope_pre
        #
        #   Positive: after the first faint, hp_gap tends to increase more
        #   (i.e. p2's advantage grows or p1's disadvantage increases).
        # --------------------------------------------------------
        fb_p1 = None
        fb_p2 = None
        for i in range(1, len(p2s)):
            if p2s[i-1]!='fnt' and p2s[i]=='fnt':
                fb_p1 = i+1; break
        for i in range(1, len(p1s)):
            if p1s[i-1]!='fnt' and p1s[i]=='fnt':
                fb_p2 = i+1; break
        fb_turns = [x for x in (fb_p1, fb_p2) if x is not None]
        if den and len(hp_gap)>=2 and fb_turns:
            fb = int(min(fb_turns))
            pre_s = max(0, fb-1-5); pre_e = max(0, fb-1)
            post_s = max(0, fb-1);  post_e = min(den, fb-1+5)
            def slope(arr):
                if len(arr) < 2: return 0.0
                x = np.arange(len(arr))
                try:
                    return float(np.polyfit(x, arr, 1)[0])
                except:
                    return 0.0
            slope_pre  = slope(hp_gap[pre_s:pre_e]) if pre_e-pre_s>=2 else 0.0
            slope_post = slope(hp_gap[post_s:post_e]) if post_e-post_s>=2 else 0.0
            feats['hp_gap_slope_jump'] = float(slope_post - slope_pre)
        else:
            feats['hp_gap_slope_jump'] = 0.0

        # --------------------------------------------------------
        # comeback_time_share_diff:
        #   We look for "sustained" comebacks in hp_gap:
        #   A comeback for one side is when hp_gap flips sign and then
        #   stays in that sign for at least min_len turns.
        #
        #   sustained_share(arr, want_positive=True):
        #       If want_positive=True, we look for a switch from negative to
        #       positive hp_gap that persists; the returned value is a
        #       fraction in [0,1] describing how "late" in the battle the
        #       sustained positive run starts (relative to battle length).
        #
        #   p1_cb: comeback for p1, seen as sustained NEGATIVE hp_gap run
        #   p2_cb: comeback for p2, seen as sustained NEGATIVE (-hp_gap) run
        #
        #   comeback_time_share_diff = p1_cb - p2_cb.
        #   Positive: p1's comeback tends to happen later / more impressively
        #   than p2's (in this crude metric).
        # --------------------------------------------------------
        def sustained_share(arr, want_positive=True, min_len=3):
            if len(arr)==0: return 0.0
            signs = np.sign(arr)
            target = 1 if want_positive else -1
            start=None
            for i in range(1, len(signs)):
                if (signs[i-1] == -target) and (signs[i] == target):
                    start = i
                    break
            if start is None: return 0.0
            streak=0
            for j in range(start, len(signs)):
                if signs[j] == target:
                    streak += 1
                    if streak >= min_len:
                        return float((len(signs)-j) / len(signs))
                else:
                    break
            return 0.0

        p1_cb = float(sustained_share(hp_gap, want_positive=False))
        p2_cb = float(sustained_share(-hp_gap, want_positive=False))
        feats['comeback_time_share_diff'] = float(p1_cb - p2_cb)

        # --------------------------------------------------------
        # Redundant assignments, but kept to preserve exact behaviour:
        #   These lines simply re-assign already computed features.
        #   They do not change any value, but they were present in the
        #   original notebook and are left here to keep everything identical.
        # --------------------------------------------------------
        feats['status_diversity_p1'] = int(sd1)
        feats['p2_seen_type_count']  = int(len(st2))
        feats['bp_mean_p2'] = feats['bp_mean_p2']

        # --------------------------------------------------------
        # Finally, we attach battle_id and (if present) player_won
        # so that the model can use this table as input/output.
        # --------------------------------------------------------
        feats['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            feats['player_won'] = int(battle['player_won'])

        rows.append(feats)

    # Turn the list of feature dicts into a DataFrame
    # and replace any missing values with 0
    df = pd.DataFrame(rows).fillna(0)
    return df


# =========================
# Our code is basically creating two dataset, a train and a test one, based on the train and test sets provided by the teachers. 
# In the train set there are 66 columns (features), that are: "revealed_count_diff","hp_edge_final","status_severity_gap_final","p1_alive_final",
# "active_entropy_diff","status_turns_advantage","tox_ratio_diff","forced_switch_share_diff",
# "rs_hit_share_diff","boom_count_diff","counter_count_diff","move_diversity_p1",
# "used_mean_spe_diff","p2_late_damage","p1_status_mean_final","attacks_rate_diff",
# "bp_mean_p2","hp_gap_peak_turn_share","eff_speed_adv_share_p2","types_last_round",
# "atk_edge_used","last_switch_turn_p1","lead_type_edge","hp_gap_slope_jump",
# "initiative_late_diff","initiative_early_diff","comeback_time_share_diff",
# "severe_status_early_share","rec_share_diff","switch_delta_exp_damage_diff",
# "substitute_break_rate_diff","confusion_late_share_diff","status_diversity_p1",
# "hp_gap_autocorr","status_late_share_diff","pingpong_switches_diff",
# "same_move_streak_max_diff","substitute_late_share_diff","p1_sleep_streak_max",
# "immune_count_diff","heal_efficiency_diff","exp_dmg_stabtype_avg_diff","hp_gap_var",
# "both_switched_share","p1_switch_late_share","p2_max_boost_sum","status_diversity_diff",
# "hp_gap_sign_flips","reflect_early_share_diff","lead_def_edge","heal_mid_diff",
# "confusion_exp_dmg_ratio_diff","type_seen_count_diff","eff_speed_edge_avg",
# "heal_late_diff","sleep_streak_max_diff","p1_turns_par","p2_used_count",
# "hp_gap_peak","p2_turns_brn","p1_immune_count","p2_seen_type_count",
# "p1_pingpong_switches","confusion_turns_diff"

# For an easy understanding of what these variables actually means, we have here a better description:

# ============================================================
# FEATURE GLOSSARY FOR build_features()
# ============================================================
#
# 1. End-of-battle state (HP & status)
# ------------------------------------
# hp_edge_final
#   Final HP edge: mean final HP% of player 2’s revealed Pokémon
#   minus mean final HP% of player 1’s revealed Pokémon.
#   > 0  -> p2 has more HP on average at the end.
#
# p1_alive_final
#   Number of player 1’s Pokémon with HP% > 0 at the end of the battle.
#
# p1_status_mean_final
#   Mean final status severity of player 1’s team, using the MAP_STATUS mapping.
#
# status_severity_gap_final
#   Final status severity difference:
#   (mean status severity of player 2’s team) − (mean status severity of player 1’s team).
#   > 0  -> p2 is, on average, in a worse status condition than p1 at the end.
#
# ------------------------------------------------------------
# 2. Status over time
# ------------------------------------------------------------
# status_turns_advantage
#   Sum over turns of (status severity for p1 − status severity for p2).
#   > 0  -> p1 spends more turns under bad statuses than p2.
#
# tox_ratio_diff
#   For each player we compute:
#     tox_ratio = (# toxic turns) / (# poison+toxic turns).
#   tox_ratio_diff = tox_ratio_p1 − tox_ratio_p2.
#   > 0  -> relative to p2, p1 is more often under TOX (badly poisoned) when poisoned at all.
#
# severe_status_early_share
#   Share of early turns (first third of the battle) where at least one side
#   has a “severe” status (severity ≥ 2).
#
# status_diversity_p1
#   Number of distinct non-"nostatus" conditions experienced by player 1.
#
# status_diversity_diff
#   (status_diversity_p1) − (status diversity of p2).
#
# p1_turns_par
#   Number of turns where player 1’s active Pokémon is paralyzed.
#
# p2_turns_brn
#   Number of turns where player 2’s active Pokémon is burned.
#
# confusion_turns_diff
#   (# of turns p1 is under confusion) − (# of turns p2 is under confusion).
#
# p1_sleep_streak_max
#   Longest consecutive sequence of turns in which p1’s active Pokémon is asleep.
#
# sleep_streak_max_diff
#   (p1’s longest sleep streak) − (p2’s longest sleep streak).
#
# confusion_late_share_diff
#   In the last third of the battle:
#   (share of turns p1 is confused) − (share of turns p2 is confused).
#
# confusion_exp_dmg_ratio_diff
#   For each player we compute:
#     ratio_confused = mean(expected damage when confused)
#                      / mean(expected damage when not confused).
#   Feature = ratio_confused_p2 − ratio_confused_p1.
#   It measures how strongly confusion depresses expected damage for each side.
#
# ------------------------------------------------------------
# 3. Team revelation and type coverage
# ------------------------------------------------------------
# revealed_count_diff
#   (# of distinct Pokémon that appeared on the field for p1)
#   − (# for p2).
#
# type_seen_count_diff
#   (# of distinct types seen on p1’s revealed Pokémon)
#   − (# of distinct types seen on p2’s revealed Pokémon).
#
# p2_seen_type_count
#   Number of distinct types seen on player 2’s revealed Pokémon.
#
# ------------------------------------------------------------
# 4. HP gap dynamics and damage/healing
# ------------------------------------------------------------
# hp_gap(t) = HP_p2(t) − HP_p1(t)
#   > 0  -> p2 is ahead in HP at turn t
#   < 0  -> p1 is ahead in HP at turn t
#
# p2_late_damage
#   Total HP lost by player 2 in the last third of the battle.
#
# hp_gap_peak
#   Maximum value of hp_gap over all turns.
#   Measures p2’s best HP advantage during the battle.
#
# hp_gap_peak_turn_share
#   (index_of_turn_with_max_hp_gap + 1) / total_turns.
#   Where in the battle (0–1) the peak advantage for p2 occurs.
#
# hp_gap_var
#   Variance of hp_gap over the whole battle.
#   High variance means very swingy / volatile games.
#
# hp_gap_autocorr
#   Autocorrelation of hp_gap between consecutive turns:
#   corr(hp_gap[t], hp_gap[t+1]).
#   High values mean advantages persist from turn to turn.
#
# hp_gap_sign_flips
#   Number of times the sign of hp_gap changes (excluding zeros).
#   Roughly: how often the lead switches between players.
#
# hp_gap_slope_jump
#   Around the first faint (“first blood”), we estimate:
#     slope_pre  = slope of hp_gap in the window BEFORE the faint.
#     slope_post = slope of hp_gap in the window AFTER the faint.
#   hp_gap_slope_jump = slope_post − slope_pre.
#   > 0  -> after the first faint, hp_gap tends to increase more (p2 advantage grows).
#
# comeback_time_share_diff
#   We look for “sustained” comebacks (sign change in hp_gap that persists).
#   For each side we compute a share in [0,1] describing how late this
#   sustained run happens.
#   comeback_time_share_diff = comeback_share_p1 − comeback_share_p2.
#   > 0  -> p1’s comeback tends to occur later / more pronounced than p2’s.
#
# heal_efficiency_diff
#   For each player:
#     loss = total damage taken
#     heal = total healing done
#     heal_eff = heal / loss
#   Feature = heal_eff_p2 − heal_eff_p1.
#
# heal_mid_diff
#   In the middle third of the battle:
#   (total healing by p2) − (total healing by p1).
#
# heal_late_diff
#   In the last third of the battle:
#   (total healing by p2) − (total healing by p1).
#
# ------------------------------------------------------------
# 5. Move usage and offensive power
# ------------------------------------------------------------
# attacks_rate_diff
#   (attacks per turn for p1) − (attacks per turn for p2),
#   where an “attack” is any turn with a non-empty move name.
#
# bp_mean_p2
#   Mean base power of moves used by player 2
#   (only counting turns where a move is used and base_power is not None).
#
# exp_dmg_stabtype_avg_diff
#   Expected damage is defined as:
#     base_power × accuracy × STAB × type_multiplier
#   where STAB = 1.5 if move type is among user’s types, else 1.0.
#   Feature = (mean expected damage for p2) − (mean expected damage for p1).
#
# switch_delta_exp_damage_diff
#   For switch turns only, we consider:
#     delta = expected_damage[i] − expected_damage[i-1]
#   For each player, we take the mean delta over all their switch turns.
#   Feature = mean_delta_for_p2_switches − mean_delta_for_p1_switches.
#   > 0  -> p2’s switches improve expected damage more than p1’s.
#
# same_move_streak_max_diff
#   For each player we compute the longest streak of consecutive turns
#   where they use the same move.
#   Feature = max_streak_p1 − max_streak_p2.
#
# move_diversity_p1
#   Number of distinct moves used by player 1 in the battle.
#
# rec_share_diff
#   share of recovery moves for p1 − share of recovery moves for p2,
#   where “recovery” moves are defined in RECOVERY_MOVES.
#
# boom_count_diff
#   (# of Explosion/Selfdestruct used by p1) − (# used by p2).
#
# counter_count_diff
#   (# of “Counter” used by p1) − (# used by p2).
#
# ------------------------------------------------------------
# 6. Speed, initiative, and boosts
# ------------------------------------------------------------
# used_mean_spe_diff
#   Usage-weighted speed difference:
#   (mean base Speed of p1’s used Pokémon, weighted by turns on field)
#   − (mean base Speed of p2’s used Pokémon, weighted by turns on field).
#
# eff_speed_adv_share_p2
#   Share of turns where p2’s “effective speed” is at least as high as p1’s.
#   Effective speed combines:
#     - base speed
#     - boosts (Gen 1 style multipliers)
#     - paralysis penalty (× 0.25 if PAR).
#
# eff_speed_edge_avg
#   Mean (effective_speed_p2 − effective_speed_p1) over all turns.
#
# initiative_early_diff
#   In the early third of the battle:
#   we measure how often p1 base speed ≥ p2 base speed, turn by turn,
#   and convert it to a “difference share” (share_p1 − share_p2).
#
# initiative_late_diff
#   Same as initiative_early_diff but computed on the last third of the battle.
#
# last_switch_turn_p1
#   Last turn index (1-based) where p1 switches.
#   If p1 never switches, set to number_of_turns + 1.
#
# p2_max_boost_sum
#   Over all turns, we look at p2’s boosts dict (stat stages) and sum them.
#   p2_max_boost_sum is the maximum of this sum over the battle.
#   Large positive values mean p2 was heavily boosted at some point.
#
# ------------------------------------------------------------
# 7. Switching behaviour
# ------------------------------------------------------------
# forced_switch_share_diff
#   A switch is considered "forced" if, right after the switch, the HP gap
#   moves against the switching player:
#     - for p1: hp_gap[i] − hp_gap[i−1] > 0 (gap moves towards p2)
#     - for p2: hp_gap[i] − hp_gap[i−1] < 0 (gap moves towards p1)
#   We compute the share of forced switches for each player.
#   Feature = share_forced_p2 − share_forced_p1.
#
# p1_pingpong_switches
#   Number of “ping-pong” patterns in p1’s active sequence:
#   A → B → A (return to the same mon after exactly one different mon).
#
# pingpong_switches_diff
#   (ping-pong count for p1) − (ping-pong count for p2).
#
# both_switched_share
#   Share of turns (from turn 2 onwards) where BOTH players switch
#   between turn i−1 and i.
#
# p1_switch_late_share
#   In the last third of the battle:
#   (number of turns where p1 switches) / (length of the late window).
#
# ------------------------------------------------------------
# 8. Field effects and immunities
# ------------------------------------------------------------
# rs_hit_share_diff
#   For each player, we classify hits as:
#     - super effective (se)
#     - resisted (rs)
#     - immune (im)
#   rs_hit_share = rs_hits / total_effective_hits.
#   Feature = rs_hit_share_p1 − rs_hit_share_p2.
#
# p1_immune_count
#   Number of times p1’s moves hit into an immunity (no damage).
#
# immune_count_diff
#   (# of immunities triggered by p1’s moves) − (# of immunities triggered by p2’s moves).
#
# substitute_late_share_diff
#   In the last third of the battle:
#   (share of turns with Substitute active on p1’s side)
#   − (share of turns with Substitute on p2’s side).
#
# substitute_break_rate_diff
#   For each player:
#     sub_t = # of turns with Substitute active
#     sub_b = # of times Substitute disappears between consecutive turns
#   break_rate = sub_b / sub_t.
#   Feature = break_rate_p1 − break_rate_p2.
#
# reflect_early_share_diff
#   In the early third of the battle:
#   (share of turns with Reflect on p1’s side)
#   − (share of turns with Reflect on p2’s side).
#
# ------------------------------------------------------------
# 9. Type matchups and team strength
# ------------------------------------------------------------
# lead_type_edge
#   Type matchup edge on turn 1:
#   aggregate advantage of p2’s lead types over p1’s lead types
#   minus the reverse (p1’s types over p2’s).
#   > 0  -> lead position is type-favorable to p2.
#
# lead_def_edge
#   Base Defense difference between leads:
#   (Defense of p2’s lead) − (Defense of p1’s lead).
#
# types_last_round
#   Type edge on the final turn:
#   sum of type advantages of p1’s active Pokémon vs p2’s
#   minus the reverse (p2 vs p1).
#   > 0  -> final board type advantage for p1.
#
# atk_edge_used
#   Attack edge between teams:
#     p1_mean_atk_team  = average base Attack of p1’s team from team details.
#     p2_mean_atk_used  = usage-weighted mean base Attack of p2’s Pokémon that actually played.
#   Feature = p2_mean_atk_used − p1_mean_atk_team.
#
# p2_used_count
#   Number of distinct Pokémon actually used by player 2
#   (i.e., took the field at least once).
#
# active_entropy_diff
#   Entropy of the distribution of active Pokémon names for each player.
#   High entropy means more diverse and balanced usage.
#   Feature = H(p1_active) − H(p2_active).
#   > 0  -> p1 used a more diverse mix of Pokémon than p2.
#
# ============================================================
# End of feature glossary for build_features()
# ============================================================


# In the test set there are 65 columns (features): the features in train set but player_won.
# Player_won is only in the train set because is the target variable.
# Belowe we are actually computing the features we built above for each line of our jsonl datasets (each line is a pokemon battle).
# We are calling build_features(...) on train and test to create
# the final tables that we will feed into the models.
# train_df has both features and the target player_won.
# test_df has only features (we will predict player_won for each row).
# ----------------------------------------------------------
train_df = build_features(train_data)
test_df  = build_features(test_data)

print(f"[FINAL] train_df: {train_df.shape}")
print(f"[FINAL] test_df : {test_df.shape}")


Building features:   0%|          | 0/10000 [00:00<?, ?it/s]

Building features:   0%|          | 0/5000 [00:00<?, ?it/s]

[FINAL] train_df: (10000, 66)
[FINAL] test_df : (5000, 65)


In [3]:
# ============================================================
# In this second cell, we build X, y, X_test from the features table.
# ------------------------------------------------------------
# At this point we already have:
#   - train_df: a big table of features for each battle, plus:
#         * 'battle_id'   (identifier of the battle)
#         * 'player_won'  (our target: 1 if player 1 won, 0 otherwise)
#   - test_df : same features and 'battle_id', but WITHOUT 'player_won'
#
# In this small cell we:
#   - find the feature columns that appear in BOTH train_df and test_df
#   - drop 'battle_id' and 'player_won' (beacuse they are not input features)
#   - we build:
#       * X      -> training features (numpy array)
#       * y      -> training labels (0/1)
#       * X_test -> test features (numpy array)

# In this cell we have taken our variables, we have sorted them and we have insert them in X and X_test.
# X is the numpy array computed from the train dataset (this is why it has 10k rows)
# X_test is the numpy array computed from the test dataset (this is why it has 5k rows).

# We have stored the target variable in a numpy array as well, called y.

# At the end of the cell we have added a sanity check, that prints the shape of X, X_test and y
# ============================================================

# Find all column names that are common between train and test
cols = sorted(set(train_df.columns) & set(test_df.columns))

# Feature columns are all common columns EXCEPT the identifiers/target.
# We do not want to use 'battle_id' or 'player_won' as numeric features.
f_cols = [c for c in cols if c not in ("battle_id", "player_won")]

# y is the target vector (the thing we want to predict).
# It is 1D: for each battle, 1 if player 1 won, 0 otherwise.
y = train_df["player_won"].astype(int).to_numpy()

# X is the matrix of features for training.
# Rows = battles, columns = features in f_cols.
X = train_df[f_cols].to_numpy(dtype=float)

# X_test is the same, but for the test set (where we do not know the labels).
X_test = test_df[f_cols].to_numpy(dtype=float)


# Sanity check: shapes of training and test matrices
print(f"X_train shape: {X.shape},\ny_train shape: {y.shape}")
print(f"X_test  shape: {X_test.shape}")


X_train shape: (10000, 64),
y_train shape: (10000,)
X_test  shape: (5000, 64)


In [4]:
# ============================================================
# 3) MODELS, HYPERPARAMETER SEARCH, ENSEMBLE, SUBMISSION
# ------------------------------------------------------------
# In this cell we:
#   - define three base models:
#       * Logistic Regression
#       * AdaBoost (with tiny decision trees as weak learners)
#       * XGBoost (gradient boosting)
#   - set up hyperparameter grids for each model
#   - run:
#       * GridSearchCV for Logistic Regression as suggested by the professor and his TA. 
#       * GridSearchCV for AdaBoost, again as suggested by the professor and his TA. 
#       * RandomizedSearchCV for XGBoost
#   - build a soft voting ensemble of the three tuned models
#     (using their CV accuracy to weight them)
#   - train the final ensemble on ALL the training data
#   - generate predictions for the test set
#   - save a CSV file we can submit to Kaggle / the competition.
# You can think of this cell as the "model playground".
# ============================================================

from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier  
import warnings
warnings.filterwarnings("ignore")


# Global configuration for our experiments:
# We have decided to set the main starting parameters here at the start of our model.
# The parameters are:
# - SEED: for enanching reproducible randomness
# - CV_FOLDS: how many cross-validation splits
# - N_JOBS: how many CPU cores to use (-1 means "all cores" available)
# - SCORING: which metric we use to judge models (here: accuracy)

SEED       = 42
CV_FOLDS   = 5
N_JOBS     = -1
SCORING    = "accuracy"

np.random.seed(SEED)
cv5 = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=SEED)

def cv_mean_acc(model, X, y):
    """Run cross-validation for this model and return the mean accuracy.
    It takes a model (like LogisticRegression or a Pipeline),
    trains and evaluates it across the folds in cv5, and then
    returns the average accuracy.
    """
    scores = cross_val_score(model, X, y, cv=cv5, scoring=SCORING, n_jobs=N_JOBS)
    return float(np.mean(scores))


# PIPELINES 
# Here we build 3 different machine learning pipelines.
# A pipeline is like a small factory that takes in raw features
# and spits out predictions. It can contain multiple steps
# (for example: scaling, then a classifier).


# Logistic Regression
# Here we run GridSearchCV on the Logistic Regression pipeline.
# It will try all combinations in param_grid_lr, using cross-validation,
# and pick the hyperparameters with the best mean accuracy.

# This is a linear model. We put it inside a Pipeline
# with a StandardScaler to make the features more comparable in scale.

pipe_lr = Pipeline([
    ("scale", StandardScaler(with_mean=False)),
    ("clf",   LogisticRegression(random_state=SEED))
])

# AdaBoost
# Same idea as above, but now for the AdaBoost pipeline.
# We try different numbers of estimators, learning rates and tree depths.

# AdaBoost is an ensemble of many small decision trees ("weak learners").
# Each tiny tree is not very powerful on its own, but together they can
# capture non-linear patterns. Here we use max_depth=1 trees (decision stumps).

pipe_ada = Pipeline([
    ("ada", AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=1, random_state=SEED),
        random_state=SEED
    )),
])

# XGBoost
# XGBoost 
# For XGBoost the grid is big, so we randomly sample a subset
# of the combinations (n_iter) instead of exploring all of them.
# This avoid being stack running the code for hours.
# In particular we are implementing a powerful gradient boosting model that builds trees one by one,
# each new tree trying to correct the mistakes of the previous ones.
# It is usually very strong on tabular data like this.

pipe_xgb = Pipeline([
    ("xgb", XGBClassifier(
        objective="binary:logistic",
        eval_metric="logloss",
        n_jobs=N_JOBS,
        random_state=SEED
    ))
])


# Hyperparameter grid for Logistic Regression:
# - solver, penalty: we focus on liblinear + L1 here
# - C: strength of regularization (higher C = weaker regularization)
# - tol: convergence tolerance for the optimizer
# - class_weight: we optionally let the model handle class imbalance
# - max_iter: how many iterations the solver is allowed to run


param_grid_lr = {
    "clf__solver":       ["liblinear"],
    "clf__penalty":      ["l1"],
    "clf__C":            [2.50, 3.00, 3.25, 3.50, 4.00],
    "clf__tol":          [1e-6, 5e-6, 1e-5, 5e-5, 1e-4],
    "clf__class_weight": [None, "balanced"],
    "clf__max_iter":     [6000, 10000],
}

# Hyperparameter grid for AdaBoost:
# - n_estimators: how many small trees
# - learning_rate: how fast we update the ensemble
# - estimator: the base decision tree (here we try depths 1 and 2)

param_grid_ada = {
    "ada__algorithm":      ["SAMME.R"],
    "ada__n_estimators":   [270, 300, 330, 350],
    "ada__learning_rate":  [0.15, 0.20, 0.26, 0.38],
    "ada__estimator":      [DecisionTreeClassifier(max_depth=d, random_state=SEED) for d in [1, 2]],
}

# Hyperparameter grid for XGBoost:
# - n_estimators: number of boosting rounds (trees)
# - max_depth: depth of each tree
# - learning_rate: step size shrinkage
# - subsample / colsample_bytree: how much data/features each tree sees
# - min_child_weight, reg_alpha, reg_lambda, gamma: regularization terms

param_grid_xgb = {
    "xgb__n_estimators":     [800, 850, 900, 950, 1000],
    "xgb__max_depth":        [4, 5],
    "xgb__learning_rate":    [0.015, 0.020, 0.022],
    "xgb__subsample":        [0.55, 0.60],
    "xgb__colsample_bytree": [0.60, 0.65],
    "xgb__min_child_weight": [2, 3],
    "xgb__reg_alpha":        [0.00, 0.03, 0.05],
    "xgb__reg_lambda":       [0.80, 1.00],
    "xgb__gamma":            [0.10, 0.20, 0.30],
}

def grid_size(grid: dict) -> int:
    """Return how many total combinations are in a parameter grid.
    We just multiply the lengths of the value lists in the dict.
    """

    n = 1
    for v in grid.values():
        n *= len(v)
    return n


# TUNING
# In this section we search for good hyperparameters for each model.
# - For Logistic Regression and AdaBoost we do a full GridSearchCV.
# - For XGBoost we do a RandomizedSearchCV because the grid is large.


# Logistic Regression
gs_lr = GridSearchCV(
    estimator=pipe_lr,
    param_grid=param_grid_lr,
    cv=cv5,
    scoring=SCORING,
    n_jobs=N_JOBS,
    verbose=1
)
gs_lr.fit(X, y)
lr_best      = gs_lr.best_estimator_
lr_best_acc  = gs_lr.best_score_

print("\n Logistic Regression")
print("CV accuracy:", round(lr_best_acc, 4))

# AdaBoost
gs_ada = GridSearchCV(
    estimator=pipe_ada,
    param_grid=param_grid_ada,
    cv=cv5,
    scoring=SCORING,
    n_jobs=N_JOBS,
    verbose=1
)
gs_ada.fit(X, y)
ada_best     = gs_ada.best_estimator_
ada_best_acc = gs_ada.best_score_

print("\n AdaBoost")
print("CV accuracy:", round(ada_best_acc, 4))

# XGBoost 
total_xgb = grid_size(param_grid_xgb)
n_iter_xgb = min(200, total_xgb)

rs_xgb = RandomizedSearchCV(
    estimator=pipe_xgb,
    param_distributions=param_grid_xgb,
    n_iter=n_iter_xgb,
    cv=cv5,
    scoring=SCORING,
    n_jobs=N_JOBS,
    verbose=1,
    random_state=SEED
)
rs_xgb.fit(X, y)
xgb_best     = rs_xgb.best_estimator_
xgb_best_acc = rs_xgb.best_score_

print("\n XGBoost")
print("CV accuracy:", round(xgb_best_acc, 4))

estimators = [
    ("lr",  lr_best),
    ("ada", ada_best),
    ("xgb", xgb_best),
]

# SOFT VOTING 
# Once we have the best version of each base model (LR, Ada, XGB),
# we combine them into a soft voting ensemble.
# "Soft" means that we average the predicted probabilities from each model,
# instead of just voting on hard class labels 0/1.
# We also give each model a weight based on its CV accuracy, so better
# models have more influence in the final decision.

cv_scores = {}
for name, mdl in estimators:
    acc = cv_mean_acc(mdl, X, y)
    cv_scores[name] = acc
    print(f"[CV] {name}: acc={acc:.4f}")

weights = [max(1e-6, cv_scores[name] ** 2) for name, _ in estimators]
vote = VotingClassifier(
    estimators=estimators,
    voting="soft",
    weights=weights,
    n_jobs=N_JOBS
)
vote_acc = cv_mean_acc(vote, X, y)
print(f"CV voting(soft, weighted): acc={vote_acc:.4f} , weights={dict(zip([n for n, _ in estimators], weights))}")


best_name  = "voting"
best_model = vote
best_cv    = vote_acc
print(f"\n>>> Best model: {best_name} (CV acc={best_cv:.4f})")

# SUBMISSION 
# Here we train the chosen model (the voting ensemble) on ALL the training data,
# Here we are building the submission DataFrame for the competition/evaluation.

best_model.fit(X, y)
train_pred = best_model.predict(X)
train_acc  = accuracy_score(y, train_pred)
print(f"\nTrain accuracy: {train_acc:.4f}")

proba = best_model.predict_proba(X_test)[:, 1]
pred  = (proba >= 0.5).astype(int)

sub = pd.DataFrame({
    "battle_id": test_df["battle_id"],
    "player_won": pred
})

out_path = (Path.cwd() / "submission_ens.csv").resolve()
sub.to_csv(out_path, index=False)
print(f"submission saved to: {out_path}")

# SUMMARY
# Finally, we print a short summary of the cross-validation scores for the
# individual models and for the ensemble, plus the training accuracy.

print("\n SUMMARY ")
for name, acc in cv_scores.items():
    print(f"{name}: CV accuracy = {acc:.4f}")
print(f"Voting ensemble (soft, weighted): CV accuracy = {vote_acc:.4f}")
print(f"Training accuracy (full data, voting ensemble): {train_acc:.4f}")


Fitting 5 folds for each of 100 candidates, totalling 500 fits

 Logistic Regression
CV accuracy: 0.8563
Fitting 5 folds for each of 32 candidates, totalling 160 fits

 AdaBoost
CV accuracy: 0.8497
Fitting 5 folds for each of 200 candidates, totalling 1000 fits

 XGBoost
CV accuracy: 0.8513
[CV] lr: acc=0.8563
[CV] ada: acc=0.8497
[CV] xgb: acc=0.8513
CV voting(soft, weighted): acc=0.8538 , weights={'lr': 0.7332496900000001, 'ada': 0.72199009, 'xgb': 0.7247116899999999}

>>> Best model: voting (CV acc=0.8538)

Train accuracy: 0.8726
submission saved to: C:\Users\2003l\OneDrive\Documenti\fds-pokemon-battles-prediction-2025\submission_ens.csv

 SUMMARY 
lr: CV accuracy = 0.8563
ada: CV accuracy = 0.8497
xgb: CV accuracy = 0.8513
Voting ensemble (soft, weighted): CV accuracy = 0.8538
Training accuracy (full data, voting ensemble): 0.8726
