# Features Engineering

## Features based on advantage

In [None]:
import pandas as pd

def create_special_team_advantage_feature(data: list[dict], base_stats: dict) -> pd.DataFrame:
    """
    special_team_advantage =
        (somma special dei 6 Pok√©mon di P1) ‚Äì
        (somma special dei Pok√©mon effettivamente usati da P2)
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id")

        # 1. Somma special del team P1
        p1_team = battle.get("p1_team_details", [])
        p1_special_sum = 0

        if isinstance(p1_team, list):
            for mon in p1_team:
                name = mon.get("name", "").lower()
                if name in base_stats:
                    p1_special_sum += base_stats[name]["spa"]

        # 2. Trova TUTTI i Pok√©mon usati dal P2 nella timeline
        timeline = battle.get("battle_timeline", [])
        p2_used_names = set()

        if isinstance(timeline, list):
            for turn in timeline:
                state = turn.get("p2_pokemon_state", {})
                name = state.get("name")
                if name:
                    p2_used_names.add(name.lower())

        # 3. Somma dello special di P2
        p2_special_sum = 0
        for name in p2_used_names:
            if name in base_stats:
                p2_special_sum += base_stats[name]["spa"]

        # 4. Differenza finale
        advantage = p1_special_sum - p2_special_sum

        rows.append({
            "battle_id": battle_id,
            "p1_special_sum": p1_special_sum,
            "p2_special_sum": p2_special_sum,
            "special_team_advantage": advantage
        })

    return pd.DataFrame(rows)


## Features definition - first set base


In [None]:
from tqdm.notebook import tqdm
import numpy as np

def create_simple_features(data: list[dict]) -> pd.DataFrame:
  feature_list = []
  for battle in tqdm(data, desc="Extracting features"):
    features = {}
    # feature plyer 1 like the mean of all 6 p1 pokemon
    p1_team = battle.get('p1_team_details', [])
    if p1_team:
      features['p1_mean_hp'] = np.mean([p.get('base_hp', 0) for p in p1_team])
      features['p1_mean_spe'] = np.mean([p.get('base_spe', 0) for p in p1_team])
      features['p1_mean_atk'] = np.mean([p.get('base_atk', 0) for p in p1_team])
      features['p1_mean_def'] = np.mean([p.get('base_def', 0) for p in p1_team])
      features['p1_mean_spa'] = np.mean([p.get('base_spa', 0) for p in p1_team])

    # feature player 2
    p2_lead = battle.get('p2_lead_details')
    if p2_lead:
      features['p2_lead_hp'] = p2_lead.get('base_hp', 0)
      features['p2_lead_spe'] = p2_lead.get('base_spe', 0)
      features['p2_lead_atk'] = p2_lead.get('base_atk', 0)
      features['p2_lead_def'] = p2_lead.get('base_def', 0)
      features['p2_lead_spa'] = p2_lead.get('base_spa', 0)

    # We also need the ID and the tagert variable, if it exists
    features['battle_id'] = battle.get('battle_id')
    if 'player_won' in battle:
      features['player_won'] = int(battle.get('player_won'))

    feature_list.append(features)

  return pd.DataFrame(feature_list).fillna(0)



In [None]:
def create_strategic_features(data: list[dict]) -> pd.DataFrame:
    import numpy as np
    from tqdm import tqdm

    feature_list = []

    for battle in tqdm(data, desc="Calcolo strategic features"):
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        battle_id = battle["battle_id"]
        winner = battle.get("winner")

        def extract_features(player_key: str, opponent_key: str):
            hp_advantage_turns = 0
            total_turns = len(timeline)
            total_damage = 0
            damage_moves = 0
            risky_moves = 0
            total_moves = 0

            for i, turn in enumerate(timeline):
                p_state = turn.get(f"{player_key[0:2]}_pokemon_state")
                o_state = turn.get(f"{opponent_key[0:2]}_pokemon_state")
                move = turn.get(f"{player_key[0:2]}_move_details")

                # Momentum
                if p_state and o_state:
                    if p_state.get("hp_pct", 0) > o_state.get("hp_pct", 0):
                        hp_advantage_turns += 1

                # Damage efficiency
                if i > 0 and i < len(timeline):
                    prev_turn = timeline[i - 1]
                    prev_o_state = prev_turn.get(f"{opponent_key[0:2]}_pokemon_state")
                    if prev_o_state and o_state:
                        dmg = max(0, prev_o_state.get("hp_pct", 0) - o_state.get("hp_pct", 0))
                        if dmg > 0:
                            total_damage += dmg
                            damage_moves += 1

                # Risk ratio
                if move is not None:
                    total_moves += 1
                    if move.get("accuracy", 1) < 0.8:
                        risky_moves += 1

            return {
                "momentum": hp_advantage_turns / total_turns if total_turns > 0 else 0,
                "damage_efficiency": total_damage / damage_moves if damage_moves > 0 else 0,
                "risk_ratio": risky_moves / total_moves if total_moves > 0 else 0,
            }

        p1 = extract_features("player_1", "player_2")
        p2 = extract_features("player_2", "player_1")

        combined = {
            "battle_id": battle_id,
            "momentum_diff": p1["momentum"] - p2["momentum"],
            "efficiency_diff": p1["damage_efficiency"] - p2["damage_efficiency"],
            "risk_diff": p1["risk_ratio"] - p2["risk_ratio"],
            "player_won": 1 if winner == "player_1" else 0 if winner == "player_2" else None,
        }

        feature_list.append(combined)

    return pd.DataFrame(feature_list)

In [None]:
def create_static_features(data: list[dict]) -> pd.DataFrame:
    """
    Estrae feature statiche aggiuntive non incluse in create_simple_features.
    Focus su variabilit√† interna al team e statistiche aggregate.
    """
    feature_list = []

    for battle in tqdm(data, desc="Estrazione feature statiche (ottimizzata)"):
        features = {}

        # Player 1: team
        p1_team = battle.get('p1_team_details', [])
        if p1_team:

            for stat in ['base_hp', 'base_atk', 'base_def', 'base_spa', 'base_spd', 'base_spe']:
                values = [p.get(stat, 0) for p in p1_team]
                features[f'p1_sum_{stat[5:]}'] = np.sum(values)
                features[f'p1_std_{stat[5:]}'] = np.std(values)

            features['p1_total_stats'] = np.sum([
                p.get('base_hp', 0) + p.get('base_atk', 0) + p.get('base_def', 0) +
                p.get('base_spa', 0) + p.get('base_spd', 0) + p.get('base_spe', 0)
                for p in p1_team
            ])

            # uniuqe types
            all_types = [t for p in p1_team for t in p.get('types', []) if t != 'notype']
            features['p1_num_types'] = len(set(all_types))


            mean_atk = np.mean([p.get('base_atk', 0) for p in p1_team])
            mean_def = np.mean([p.get('base_def', 0) for p in p1_team])
            mean_spa = np.mean([p.get('base_spa', 0) for p in p1_team])
            mean_spd = np.mean([p.get('base_spd', 0) for p in p1_team])
            features['atk_def_ratio'] = mean_atk / (mean_def + 1e-5)
            features['spa_spd_ratio'] = mean_spa / (mean_spd + 1e-5)

        # Target e ID
        features['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


In [None]:
def create_dynamic_features(data: list[dict]) -> pd.DataFrame:
    """
    Estraiamo feature dinamiche dalla battle_timeline.
    Usiamo HP, status, cambi Pok√©mon, mosse e KO.
    """
    feature_list = []

    for battle in tqdm(data, desc="Estrazione feature dinamiche"):
        features = {"battle_id": battle["battle_id"]}

        # target (if exists)
        if "player_won" in battle:
          features["player_won"] = int(battle["player_won"])


        timeline = battle.get("battle_timeline", [])
        if not timeline:
            feature_list.append(features)
            continue

        # 1Ô∏è Base information

        features["num_turns"] = len(timeline)

        # Conta le mosse eseguite
        p1_moves = sum(1 for t in timeline if t.get("p1_move_details") is not None)
        p2_moves = sum(1 for t in timeline if t.get("p2_move_details") is not None)
        features["p1_move_count"] = p1_moves
        features["p2_move_count"] = p2_moves
        features["move_count_diff"] = p1_moves - p2_moves

        # 2Ô∏è calculating inflicted damage

        p1_dmg, p2_dmg = 0.0, 0.0
        for i in range(1, len(timeline)):
            prev, curr = timeline[i - 1], timeline[i]

            try:
                p2_hp_prev = prev["p2_pokemon_state"]["hp_pct"]
                p2_hp_curr = curr["p2_pokemon_state"]["hp_pct"]
                p1_dmg += max(0, p2_hp_prev - p2_hp_curr)
            except Exception:
                pass

            try:
                p1_hp_prev = prev["p1_pokemon_state"]["hp_pct"]
                p1_hp_curr = curr["p1_pokemon_state"]["hp_pct"]
                p2_dmg += max(0, p1_hp_prev - p1_hp_curr)
            except Exception:
                pass

        features["p1_total_damage"] = p1_dmg
        features["p2_total_damage"] = p2_dmg
        features["damage_diff"] = p1_dmg - p2_dmg

        # 3Ô∏è alterated status

        statuses = {"brn", "psn", "tox", "par", "slp", "frz"}
        p1_status_inflicted = sum(
            1 for t in timeline if t["p2_pokemon_state"].get("status") in statuses
        )
        p2_status_inflicted = sum(
            1 for t in timeline if t["p1_pokemon_state"].get("status") in statuses
        )
        features["p1_status_inflicted"] = p1_status_inflicted
        features["p2_status_inflicted"] = p2_status_inflicted
        features["status_diff"] = p1_status_inflicted - p2_status_inflicted

        # 4Ô∏è number of pokemon switch

        p1_switches, p2_switches = 0, 0
        for i in range(1, len(timeline)):
            try:
                if timeline[i]["p1_pokemon_state"]["name"] != timeline[i - 1]["p1_pokemon_state"]["name"]:
                    p1_switches += 1
                if timeline[i]["p2_pokemon_state"]["name"] != timeline[i - 1]["p2_pokemon_state"]["name"]:
                    p2_switches += 1
            except Exception:
                continue
        features["p1_switches"] = p1_switches
        features["p2_switches"] = p2_switches
        features["switch_diff"] = p1_switches - p2_switches

        # 5Ô∏è KO (if there are any)

        p1_kos = sum(
            1 for t in timeline
            if t.get("p2_pokemon_state", {}).get("hp_pct", 1.0) == 0
        )
        p2_kos = sum(
            1 for t in timeline
            if t.get("p1_pokemon_state", {}).get("hp_pct", 1.0) == 0
        )
        features["p1_kos"] = p1_kos
        features["p2_kos"] = p2_kos
        features["ko_diff"] = p1_kos - p2_kos

        # 6Ô∏è Number of turns with negative status for player 1

        statuses = {"brn", "psn", "tox", "par", "slp", "frz"}
        p1_negative_status_count = 0
        for turn in timeline:
            p1_status = turn["p1_pokemon_state"].get("status")

            # Se P1 ha uno degli status negativi
            if isinstance(p1_status, str) and p1_status in statuses:
                p1_negative_status_count += 1
            elif isinstance(p1_status, list):
                for s in p1_status:
                    if s in statuses:
                        p1_negative_status_count += 1
                        break
        features["p1_negative_status_count"] = p1_negative_status_count

        # 7Ô∏è Number of turns with double negative status for player 1

        p1_double_neg_status = 0
        for turn in timeline:
            p1_status = turn["p1_pokemon_state"].get("status")


            if isinstance(p1_status, list):  # Se lo status √® una lista (due status)
                count_neg = sum(1 for status in p1_status if status in statuses)
                if count_neg >= 2:  # Se ci sono almeno due status negativi
                    p1_double_neg_status += 1
        features["p1_double_negative_status"] = p1_double_neg_status

        # 8Ô∏è accuracy moves mean for p1

        p1_accuracy_sum = 0.0
        p1_move_count = 0

        for turn in timeline:
            p1_move_details = turn.get("p1_move_details")
            if p1_move_details and "accuracy" in p1_move_details:
                p1_accuracy_sum += p1_move_details["accuracy"]
                p1_move_count += 1  # Conta solo i turni con mossa

        # Media dell'accuratezza delle mosse per P1 (se ci sono mosse)
        if p1_move_count > 0:
            features["p1_move_accuracy_avg"] = p1_accuracy_sum / p1_move_count
        else:
            features["p1_move_accuracy_avg"] = 0.0

        # 9Ô∏è accuracy moves mean for p2

        p2_accuracy_sum = 0.0
        p2_move_count = 0

        for turn in timeline:
            p2_move_details = turn.get("p2_move_details")
            if p2_move_details and "accuracy" in p2_move_details:
                p2_accuracy_sum += p2_move_details["accuracy"]
                p2_move_count += 1  # Conta solo i turni con mossa

        # accuracy moves mean for p1(if there are any moves)
        if p2_move_count > 0:
            features["p2_move_accuracy_avg"] = p2_accuracy_sum / p2_move_count
        else:
            features["p2_move_accuracy_avg"] = 0.0


        feature_list.append(features)

    return pd.DataFrame(feature_list)


In [None]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd


# Type chart
type_chart = {
    "Normal":    {"Rock": 0.5, "Ghost": 0.0},
    "Fire":      {"Fire": 0.5, "Water": 0.5, "Grass": 2.0, "Ice": 2.0, "Bug": 2.0, "Rock": 0.5, "Dragon": 0.5},
    "Water":     {"Fire": 2.0, "Water": 0.5, "Grass": 0.5, "Ground": 2.0, "Rock": 2.0, "Dragon": 0.5},
    "Electric":  {"Water": 2.0, "Electric": 0.5, "Grass": 0.5, "Ground": 0.0, "Flying": 2.0, "Dragon": 0.5},
    "Grass":     {"Fire": 0.5, "Water": 2.0, "Grass": 0.5, "Poison": 0.5, "Ground": 2.0, "Flying": 0.5, "Bug": 0.5, "Rock": 2.0, "Dragon": 0.5},
    "Ice":       {"Fire": 0.5, "Water": 0.5, "Grass": 2.0, "Ground": 2.0, "Flying": 2.0, "Dragon": 2.0},
    "Fighting":  {"Normal": 2.0, "Ice": 2.0, "Poison": 0.5, "Flying": 0.5, "Psychic": 0.5, "Bug": 0.5, "Rock": 2.0, "Ghost": 0.0},
    "Poison":    {"Grass": 2.0, "Poison": 0.5, "Ground": 0.5, "Rock": 0.5, "Ghost": 0.5},
    "Ground":    {"Fire": 2.0, "Electric": 2.0, "Grass": 0.5, "Poison": 2.0, "Flying": 0.0, "Bug": 0.5, "Rock": 2.0},
    "Flying":    {"Electric": 0.5, "Grass": 2.0, "Fighting": 2.0, "Bug": 2.0, "Rock": 0.5},
    "Psychic":   {"Fighting": 2.0, "Poison": 2.0, "Psychic": 0.5},
    "Bug":       {"Fire": 0.5, "Grass": 2.0, "Fighting": 0.5, "Poison": 0.5, "Flying": 0.5, "Psychic": 2.0, "Ghost": 0.5},
    "Rock":      {"Fire": 2.0, "Ice": 2.0, "Fighting": 0.5, "Ground": 0.5, "Flying": 2.0, "Bug": 2.0},
    "Ghost":     {"Normal": 0.0, "Psychic": 2.0, "Ghost": 2.0},
    "Dragon":    {"Dragon": 2.0},
}

def get_type_effectiveness(attacker_types, defender_types):
    total = 1.0
    for atk_type in attacker_types:
        for def_type in defender_types:
            total *= type_chart.get(atk_type.capitalize(), {}).get(def_type.capitalize(), 1.0)
    return total



def create_type_effectiveness_feature(data):
    rows = []
    for battle in tqdm(data, desc="Calcolo type effectiveness (team-based)"):
        b_id = battle.get("battle_id")
        player_won = int(battle.get("player_won", 0))

        p1_team = battle.get("p1_team_details", []) or []
        p2_lead = battle.get("p2_lead_details", {}) or {}

        p2_types = [t.capitalize() for t in p2_lead.get("types", []) if isinstance(t, str)]
        p1_types_team = [[t.capitalize() for t in (p.get("types", []) or []) if isinstance(t, str)] for p in p1_team]

        if p1_types_team and p2_types:
            p1_eff_list = [get_type_effectiveness(p1_types, p2_types) for p1_types in p1_types_team]
            p2_eff_list = [get_type_effectiveness(p2_types, p1_types) for p1_types in p1_types_team]
            p1_eff = float(np.mean(p1_eff_list)) if len(p1_eff_list) else 1.0
            p2_eff = float(np.mean(p2_eff_list)) if len(p2_eff_list) else 1.0
        else:
            p1_eff, p2_eff = 1.0, 1.0

        rows.append({
            "battle_id": b_id,
            "p1_effectiveness": p1_eff,
            "p2_effectiveness": p2_eff,
            "type_eff_diff": p1_eff - p2_eff,
            "player_won": player_won
        })

    df = pd.DataFrame(rows)
    if df.empty:
        df = pd.DataFrame(columns=["battle_id", "player_won", "p1_effectiveness", "p2_effectiveness", "type_eff_diff"])
    return df.fillna(1.0)



In [None]:
def create_timeline_strategy_features(data: list[dict]) -> pd.DataFrame:
    """
    Estrae feature strategiche dal battle_timeline.
    Restituisce SOLO differenze tra player_1 e player_2:
      - total_boosts_diff
      - avg_priority_diff
      - avg_power_diff
      - num_status_moves_diff
    """
    feature_list = []

    for battle in tqdm(data, desc="Estrazione timeline diff features"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        def extract_features(move_key, state_key):
            moves = [t[move_key] for t in timeline if t.get(move_key) is not None]
            boosts = [
                t[state_key]["boosts"]
                for t in timeline
                if t.get(state_key) and isinstance(t[state_key].get("boosts"), dict)
            ]

            total_boosts = sum(1 for b in boosts for v in b.values() if v != 0)
            avg_priority = np.mean([m.get("priority", 0) for m in moves]) if moves else 0
            offensive_moves = [m for m in moves if m.get("category") in ["PHYSICAL", "SPECIAL"]]
            avg_power = np.mean([m.get("base_power", 0) for m in offensive_moves]) if offensive_moves else 0
            num_status_moves = sum(1 for m in moves if m.get("category") == "STATUS")

            return total_boosts, avg_priority, avg_power, num_status_moves

        p1_vals = extract_features("p1_move_details", "p1_pokemon_state")
        p2_vals = extract_features("p2_move_details", "p2_pokemon_state")

        features = {
            "battle_id": battle_id,
            "total_boosts_diff": p1_vals[0] - p2_vals[0],
            "avg_priority_diff": p1_vals[1] - p2_vals[1],
            "avg_power_diff": p1_vals[2] - p2_vals[2],
            "num_status_moves_diff": p1_vals[3] - p2_vals[3],
        }

        if "player_won" in battle:
          features["player_won"] = int(battle["player_won"])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


In [None]:
def create_freeze_features(data: list[dict]) -> pd.DataFrame:
    """
    Creismo delle feature legate allo status 'Freeze' basate sulla battle_timeline.
     """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="Estrazione freeze features"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        p1_frozen_turns = 0
        p2_frozen_turns = 0
        p1_inflicted_freeze = 0
        p2_inflicted_freeze = 0

        for turn in timeline:
            p1_status = turn.get("p1_pokemon_state", {}).get("status")
            p2_status = turn.get("p2_pokemon_state", {}).get("status")

            if isinstance(p1_status, list):
                if "frz" in p1_status:
                    p1_frozen_turns += 1
            elif p1_status == "frz":
                p1_frozen_turns += 1

            if isinstance(p2_status, list):
                if "frz" in p2_status:
                    p2_frozen_turns += 1
            elif p2_status == "frz":
                p2_frozen_turns += 1

            if p2_status == "frz":
                p1_inflicted_freeze += 1
            if p1_status == "frz":
                p2_inflicted_freeze += 1

        num_turns = len(timeline)
        freeze_turn_diff = p1_frozen_turns - p2_frozen_turns
        freeze_event_diff = p1_inflicted_freeze - p2_inflicted_freeze

        row = {
            "battle_id": battle_id,
            "freeze_turn_diff": freeze_turn_diff,
            "freeze_event_diff": freeze_event_diff,
            "freeze_turn_ratio": freeze_turn_diff / (num_turns + 1e-5),
            "freeze_impact_score": (
                0.7 * freeze_turn_diff + 0.3 * freeze_event_diff
            ) / (num_turns + 1e-5),
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    df = pd.DataFrame(rows).fillna(0)
    df = df.replace([np.inf, -np.inf], 0)
    df = df.drop(columns=["player_won"], errors="ignore")
    return df

In [None]:
def create_paralysis_features(data: list[dict]) -> pd.DataFrame:
    """
    Creiamo feature legate allo status 'par' (paralysis) basate sulla timeline.
    La paralisi riduce la speed e causa ~25% di probabilit√† di turni saltati.
    """
    from tqdm.notebook import tqdm
    import pandas as pd

    feature_list = []

    for battle in tqdm(data, desc="Estrazione paralysis features"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        p1_par_turns = 0
        p2_par_turns = 0
        p1_par_events = 0
        p2_par_events = 0

        for turn in timeline:
            p1_status = turn.get("p1_pokemon_state", {}).get("status")
            p2_status = turn.get("p2_pokemon_state", {}).get("status")

            if isinstance(p1_status, list):
                if "par" in p1_status:
                    p1_par_turns += 1
            elif p1_status == "par":
                p1_par_turns += 1

            if isinstance(p2_status, list):
                if "par" in p2_status:
                    p2_par_turns += 1
            elif p2_status == "par":
                p2_par_turns += 1

            if p2_status == "par":
                p1_par_events += 1
            if p1_status == "par":
                p2_par_events += 1

        num_turns = len(timeline)
        expected_skip_p1 = 0.25 * p1_par_turns
        expected_skip_p2 = 0.25 * p2_par_turns

        features = {
            "battle_id": battle_id,
            "p1_par_turns": p1_par_turns,
            "p2_par_turns": p2_par_turns,
            "par_turn_diff": p1_par_turns - p2_par_turns,
            "par_event_diff": p1_par_events - p2_par_events,
            "expected_skip_diff": expected_skip_p1 - expected_skip_p2,
            "par_turn_ratio": (p1_par_turns - p2_par_turns) / (num_turns + 1e-5),
        }

        if "player_won" in battle:
            features["player_won"] = int(battle["player_won"])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


In [None]:
def create_critical_hit_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcolamp feature basate sul critical hit ratio, che in Gen 1 dipende dalla Speed.
    Usa la formula: crit_prob = base_speed / 512.
    Ritorna la differenza di potenziale tra P1 e P2.
    """
    from tqdm.notebook import tqdm
    import numpy as np
    import pandas as pd

    feature_list = []

    for battle in tqdm(data, desc="Estrazione critical hit features"):
        battle_id = battle.get("battle_id")

        #  Player 1
        p1_team = battle.get("p1_team_details", [])
        if p1_team:
            p1_crits = [p.get("base_spe", 0) / 512 for p in p1_team]
            p1_mean_crit = float(np.mean(p1_crits))
        else:
            p1_mean_crit = 0.0

        # Player 2
        p2_team = battle.get("p2_team_details", [])
        if p2_team:
            p2_crits = [p.get("base_spe", 0) / 512 for p in p2_team]
            p2_mean_crit = float(np.mean(p2_crits))
        else:
            p2_lead = battle.get("p2_lead_details", {})
            p2_mean_crit = p2_lead.get("base_spe", 0) / 512 if p2_lead else 0.0

        features = {
            "battle_id": battle_id,
            "p1_mean_crit_ratio": p1_mean_crit,
            "p2_mean_crit_ratio": p2_mean_crit,
            "crit_ratio_diff": p1_mean_crit - p2_mean_crit,
        }

        if "player_won" in battle:
            features["player_won"] = int(battle["player_won"])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


In [None]:
def create_rng_features(data: list[dict]) -> pd.DataFrame:
    """
    Stima un indice di 'fortuna' (RNG impact) basato su eventi casuali:
    - accuracy (mosse mancate)
    - status inferti/subiti (freeze, par, slp)
    - varianza del danno effettivo
    """
    from tqdm.notebook import tqdm
    import numpy as np
    import pandas as pd

    feature_list = []
    status_set = {"frz", "par", "slp"}

    for battle in tqdm(data, desc="Estrazione RNG features"):
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        battle_id = battle["battle_id"]

        p1_hits = p1_misses = p2_hits = p2_misses = 0

        p1_damages, p2_damages = [], []

        p1_lucky_events = p2_lucky_events = 0

        for i, turn in enumerate(timeline[1:], start=1):
            prev = timeline[i - 1]
            curr = turn

            for p, opp, hits, misses in [("p1", "p2", "p1_hits", "p1_misses"),
                                         ("p2", "p1", "p2_hits", "p2_misses")]:
                move = turn.get(f"{p}_move_details")
                if move:
                    acc = move.get("accuracy", 1.0)
                    if np.random.rand() > acc:  # proxy casuale, non leakage
                        locals()[misses] += 1
                    else:
                        locals()[hits] += 1

            try:
                p2_prev_hp = prev["p2_pokemon_state"]["hp_pct"]
                p2_curr_hp = curr["p2_pokemon_state"]["hp_pct"]
                dmg = max(0, p2_prev_hp - p2_curr_hp)
                if dmg > 0:
                    p1_damages.append(dmg)
            except Exception:
                pass

            try:
                p1_prev_hp = prev["p1_pokemon_state"]["hp_pct"]
                p1_curr_hp = curr["p1_pokemon_state"]["hp_pct"]
                dmg = max(0, p1_prev_hp - p1_curr_hp)
                if dmg > 0:
                    p2_damages.append(dmg)
            except Exception:
                pass

            p1_status = curr.get("p1_pokemon_state", {}).get("status")
            p2_status = curr.get("p2_pokemon_state", {}).get("status")
            if p2_status in status_set:
                p1_lucky_events += 1
            if p1_status in status_set:
                p2_lucky_events += 1

        rng_accuracy_diff = (p2_misses - p1_misses)
        rng_dmg_var_diff = np.var(p1_damages) - np.var(p2_damages)
        rng_status_diff = p1_lucky_events - p2_lucky_events

        rng_global_index = (0.4 * np.tanh(rng_status_diff)
                            + 0.3 * np.tanh(rng_dmg_var_diff / 100)
                            + 0.3 * np.tanh(-rng_accuracy_diff / 10))

        features = {
            "battle_id": battle_id,
            "rng_accuracy_diff": rng_accuracy_diff,
            "rng_dmg_var_diff": rng_dmg_var_diff,
            "rng_status_diff": rng_status_diff,
            "rng_global_index": rng_global_index,
            "player_won": int(battle.get("player_won", 0))
        }

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


## Features sleep e features toptier

In [None]:
# FEATURES STATUS SLEEP
def create_sleep_features(data: list[dict]) -> pd.DataFrame:
    """
    Estrae feature legate allo status SLEEP (slp) dal battle_timeline.
    Conta il numero totale di turni in cui ciascun player ha un Pok√©mon addormentato.
    """
    from tqdm.notebook import tqdm
    import pandas as pd

    feature_list = []

    for battle in tqdm(data, desc="Estrazione sleep features"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        p1_sleep_turns = 0
        p2_sleep_turns = 0

        for turn in timeline:
            p1_status = turn.get("p1_pokemon_state", {}).get("status")
            p2_status = turn.get("p2_pokemon_state", {}).get("status")

            if isinstance(p1_status, str) and p1_status == "slp":
                p1_sleep_turns += 1
            elif isinstance(p1_status, list) and "slp" in p1_status:
                p1_sleep_turns += 1

            if isinstance(p2_status, str) and p2_status == "slp":
                p2_sleep_turns += 1
            elif isinstance(p2_status, list) and "slp" in p2_status:
                p2_sleep_turns += 1

        features = {
            "battle_id": battle_id,
            "p1_sleep_turns": p1_sleep_turns,
            "p2_sleep_turns": p2_sleep_turns,
            "sleep_diff": p2_sleep_turns - p1_sleep_turns
        }

        if "player_won" in battle:
            features["player_won"] = int(battle["player_won"])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)

In [None]:
# features che tengono conto dei pokemon top tier, capire bene come pesarle

top_tiers = ["Tauros", "Snorlax", "Chansey", "Starmie", "Exeggutor"]

def count_top_tiers_p1(row):
    team = row.get("p1_team_details", [])
    if not isinstance(team, list):
        return 0
    names = [p.get("name", "").capitalize() for p in team if isinstance(p, dict)]
    return sum(1 for n in names if n in top_tiers)

def count_top_tiers_p2(row):
    names = set()
    lead = row.get("p2_lead_details")
    if isinstance(lead, dict):
        names.add(lead.get("name", "").capitalize())
    timeline = row.get("battle_timeline", [])
    if isinstance(timeline, list):
        for turn in timeline:
            if isinstance(turn, dict) and "p2_pokemon_state" in turn:
                n = turn["p2_pokemon_state"].get("name", "").capitalize()
                if n:
                    names.add(n)
    return sum(1 for n in names if n in top_tiers)

tier_features_train = pd.DataFrame({
    "battle_id": raw_train_df["battle_id"],
    "p1_top_tier_count": raw_train_df.apply(count_top_tiers_p1, axis=1),
    "p2_top_tier_count": raw_train_df.apply(count_top_tiers_p2, axis=1)
})

tier_features_train["tier_diff"] = tier_features_train["p1_top_tier_count"] - tier_features_train["p2_top_tier_count"]
tier_features_train["tier_advantage"] = (tier_features_train["tier_diff"] > 0).astype(int)






## Features for status

In [None]:
# FEATURES CHE MISURA L'EFFETTO PRATICO DELLA PARALISI

import pandas as pd

def create_paralysis_block_rate_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola, per ogni battaglia, la differenza tra le percentuali di turni bloccati
    a causa di paralysis per i due giocatori.
    >0 => P2 √® stato bloccato pi√π spesso (vantaggio per P1)
    <0 => P1 √® stato pi√π bloccato (svantaggio)
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        # Contatori
        p1_par_turns = 0
        p2_par_turns = 0
        p1_blocked_turns = 0
        p2_blocked_turns = 0

        for turn in timeline:
            if not isinstance(turn, dict):
                continue

            # Stato
            p1_status = str(turn.get("p1_pokemon_state", {}).get("status", "")).lower()
            p2_status = str(turn.get("p2_pokemon_state", {}).get("status", "")).lower()

            # Mossa eseguita
            p1_move = turn.get("p1_move_details", None)
            p2_move = turn.get("p2_move_details", None)

            # Se √® paralizzato -> turno rilevante
            if p1_status in {"par", "paralyzed"}:
                p1_par_turns += 1
                if p1_move is None:
                    p1_blocked_turns += 1

            if p2_status in {"par", "paralyzed"}:
                p2_par_turns += 1
                if p2_move is None:
                    p2_blocked_turns += 1

        p1_block_rate = p1_blocked_turns / p1_par_turns if p1_par_turns > 0 else 0
        p2_block_rate = p2_blocked_turns / p2_par_turns if p2_par_turns > 0 else 0

        rows.append({
            "battle_id": battle_id,
            "p1_par_block_rate": p1_block_rate,
            "p2_par_block_rate": p2_block_rate,
            "paralysis_block_rate": p2_block_rate - p1_block_rate  # positivo = vantaggio P1
        })

    return pd.DataFrame(rows)


In [None]:
# FEATURES CHE PESA LA GRAVITA' DEGLI STATUS

import pandas as pd

def create_status_severity_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola per ogni battaglia un punteggio 'status_severity_score' basato sulla gravit√†
    degli status inflitti/subiti da P1 rispetto a P2.
    """
    status_weights = {
        "slp": 3, "sleep": 3,
        "frz": 3, "freeze": 3,
        "par": 2, "paralyzed": 2,
        "brn": 1, "burn": 1,
        "psn": 1, "poison": 1,
        "nostatus": 0, "none": 0
    }

    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list):
            continue

        p1_score = 0
        p2_score = 0

        for turn in timeline:
            if not isinstance(turn, dict):
                continue

            p1_status = str(turn.get("p1_pokemon_state", {}).get("status", "")).lower()
            p2_status = str(turn.get("p2_pokemon_state", {}).get("status", "")).lower()

            p1_score += status_weights.get(p2_status, 0)  # P1 infligge a P2
            p2_score += status_weights.get(p1_status, 0)  # P2 infligge a P1

        rows.append({
            "battle_id": battle_id,
            "status_severity_score": p1_score - p2_score,
            "p1_status_score": p1_score,
            "p2_status_score": p2_score
        })

    return pd.DataFrame(rows)


In [None]:
# FEATURES CHE MISURA IL BILANCIO NETTO DEL CONTROLLO

import pandas as pd

def create_status_net_balance_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola per ogni battaglia il bilancio netto degli status inflitti/subiti da P1.
    +1 per ogni status inflitto da P1, -1 per ogni status subito da P1.
    """
    valid_status = {"slp", "sleep", "frz", "freeze", "par", "paralyzed", "brn", "burn", "psn", "poison"}
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list):
            continue

        inflicted = 0
        suffered = 0

        for turn in timeline:
            if not isinstance(turn, dict):
                continue

            p1_status = str(turn.get("p1_pokemon_state", {}).get("status", "")).lower()
            p2_status = str(turn.get("p2_pokemon_state", {}).get("status", "")).lower()

            if p2_status in valid_status:
                inflicted += 1

            if p1_status in valid_status:
                suffered += 1

        rows.append({
            "battle_id": battle_id,
            "status_inflicted": inflicted,
            "status_suffered": suffered,
            "status_net_balance": inflicted - suffered
        })

    return pd.DataFrame(rows)


## Features based on timeline p.1

In [None]:
# FEATURES CHE MISURA L'ANDAMENTO DELLE BATTAGLIE NEL TEMPO

import pandas as pd

def create_lead_duration_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola per ogni battaglia il numero di turni in cui P1 ha avuto pi√π HP% di P2.
    lead_duration_diff = (turni vantaggio P1) - (turni vantaggio P2)
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list):
            continue

        p1_lead_turns = 0
        p2_lead_turns = 0
        equal_turns = 0

        for turn in timeline:
            if not isinstance(turn, dict):
                continue

            p1_hp = turn.get("p1_pokemon_state", {}).get("hp_pct", None)
            p2_hp = turn.get("p2_pokemon_state", {}).get("hp_pct", None)

            if p1_hp is None or p2_hp is None:
                continue

            if p1_hp > p2_hp:
                p1_lead_turns += 1
            elif p2_hp > p1_hp:
                p2_lead_turns += 1
            else:
                equal_turns += 1

        rows.append({
            "battle_id": battle_id,
            "p1_lead_turns": p1_lead_turns,
            "p2_lead_turns": p2_lead_turns,
            "equal_turns": equal_turns,
            "lead_duration_diff": p1_lead_turns - p2_lead_turns
        })

    return pd.DataFrame(rows)


In [None]:
# FEATURES DIFFERENZA COLPI CRITICI DI P1 E P2


import pandas as pd

def create_critical_hit_ratio_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola, per ogni battaglia, la differenza netta tra i colpi critici
    inflitti da P1 e quelli inflitti da P2.
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list):
            continue

        p1_crit_count = 0
        p2_crit_count = 0

        for turn in timeline:
            if not isinstance(turn, dict):
                continue

            p1_move = turn.get("p1_move_details", {})
            p2_move = turn.get("p2_move_details", {})

            if isinstance(p1_move, dict):
                if p1_move.get("critical", False) or p1_move.get("crit", False):
                    p1_crit_count += 1

            if isinstance(p2_move, dict):
                if p2_move.get("critical", False) or p2_move.get("crit", False):
                    p2_crit_count += 1

        rows.append({
            "battle_id": battle_id,
            "p1_critical_hits": p1_crit_count,
            "p2_critical_hits": p2_crit_count,
            "critical_hit_ratio_diff": p1_crit_count - p2_crit_count
        })

    return pd.DataFrame(rows)


In [None]:
# FEATURES CHE VALUTA LA RECOVERY DEL PLAYER


import pandas as pd

def create_hp_recovery_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola, per ogni battaglia, il numero di turni in cui ciascun player
    ha recuperato HP rispetto al turno precedente.
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id", None)
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) < 2:
            continue

        p1_recover_turns = 0
        p2_recover_turns = 0

        prev_p1_hp = timeline[0].get("p1_pokemon_state", {}).get("hp_pct", None)
        prev_p2_hp = timeline[0].get("p2_pokemon_state", {}).get("hp_pct", None)

        for turn in timeline[1:]:
            p1_hp = turn.get("p1_pokemon_state", {}).get("hp_pct", None)
            p2_hp = turn.get("p2_pokemon_state", {}).get("hp_pct", None)

            # controllo HP validi e positivi
            if p1_hp is not None and prev_p1_hp is not None and p1_hp > prev_p1_hp:
                p1_recover_turns += 1
            if p2_hp is not None and prev_p2_hp is not None and p2_hp > prev_p2_hp:
                p2_recover_turns += 1

            prev_p1_hp = p1_hp
            prev_p2_hp = p2_hp

        rows.append({
            "battle_id": battle_id,
            "p1_hp_recovery_turns": p1_recover_turns,
            "p2_hp_recovery_turns": p2_recover_turns,
            "hp_recovery_turns_diff": p1_recover_turns - p2_recover_turns
        })

    return pd.DataFrame(rows)


In [None]:
# FEATURES CHE COMBINA IN UN UNICO INDICATORE DUE DIMENSIONI COME FORTUNA E CONTROLLO TATTICO


import pandas as pd
import numpy as np

def create_luck_control_index(df: pd.DataFrame) -> pd.DataFrame:
    """
    Combina la fortuna (RNG) e il controllo degli status in un unico indice composito.
    luck_control_index = rng_status_diff + 0.5 * status_net_balance
    """
    df = df.copy()

    if "rng_status_diff" not in df.columns or "status_net_balance" not in df.columns:
        raise KeyError("Colonne necessarie non trovate: servono 'rng_status_diff' e 'status_net_balance'.")

    df["luck_control_index"] = (
        df["rng_status_diff"].fillna(0) + 0.5 * df["status_net_balance"].fillna(0)
    )

    return df[["battle_id", "luck_control_index"]]


In [None]:
# features sul momentum, cio√® l'evolversi del vantaggio durante la battaglia

def create_momentum_score_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola un punteggio cumulativo di momentum:
    combina vantaggio in HP, KO e status per turno.
    """
    rows = []
    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        score = 0
        total_turns = len(timeline)

        for turn in timeline:
            p1_hp = turn.get("p1_pokemon_state", {}).get("hp_pct", 1)
            p2_hp = turn.get("p2_pokemon_state", {}).get("hp_pct", 1)
            hp_diff = (p1_hp - p2_hp)  # vantaggio in HP

            p1_status = turn.get("p1_pokemon_state", {}).get("status", "")
            p2_status = turn.get("p2_pokemon_state", {}).get("status", "")
            status_bonus = 0
            if p2_status not in ("nostatus", "noeffect", None):
                status_bonus += 0.5
            if p1_status not in ("nostatus", "noeffect", None):
                status_bonus -= 0.5

            if p2_hp == 0 and p1_hp > 0:
                score += 3
            elif p1_hp == 0 and p2_hp > 0:
                score -= 3

            score += hp_diff + status_bonus

        avg_score = score / total_turns if total_turns > 0 else 0
        rows.append({
            "battle_id": battle_id,
            "momentum_score": avg_score
        })

    return pd.DataFrame(rows)




In [None]:
# features sulla differenza di switch tra player


import pandas as pd

def create_switch_count_diff_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la differenza nel numero di switch effettuati da P1 e P2.
    Uno switch √® dedotto quando il nome del Pok√©mon attivo cambia rispetto al turno precedente.
    """
    rows = []
    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        p1_switches = 0
        p2_switches = 0
        prev_p1 = None
        prev_p2 = None

        for turn in timeline:
            p1_name = turn.get("p1_pokemon_state", {}).get("name")
            p2_name = turn.get("p2_pokemon_state", {}).get("name")

            if prev_p1 and p1_name and p1_name != prev_p1:
                p1_switches += 1
            if prev_p2 and p2_name and p2_name != prev_p2:
                p2_switches += 1

            prev_p1 = p1_name
            prev_p2 = p2_name

        rows.append({
            "battle_id": battle_id,
            "p1_switches": p1_switches,
            "p2_switches": p2_switches,
            "switch_count_diff": p1_switches - p2_switches
        })

    return pd.DataFrame(rows)


## Features status advanced

In [None]:
import pandas as pd

def create_psychic_advantage_feature(data: list[dict], base_stats: dict) -> pd.DataFrame:
    """
    Calcola:
    psychic_advantage = (# psychic P1) - (# psychic P2)

    P1 ‚Üí dai team_details
    P2 ‚Üí dai pokemon effettivamente usati nella timeline
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id")

        p1_team = battle.get("p1_team_details", [])
        p1_psychic_count = 0

        if isinstance(p1_team, list):
            for mon in p1_team:
                name = mon.get("name", "").lower()
                if name in base_stats:
                    types = base_stats[name].get("types", [])
                    if "psychic" in [t.lower() for t in types]:
                        p1_psychic_count += 1

        timeline = battle.get("battle_timeline", [])
        p2_seen = set()

        if isinstance(timeline, list):
            for turn in timeline:
                state = turn.get("p2_pokemon_state", {})
                name = state.get("name")
                if name:
                    p2_seen.add(name.lower())


        p2_psychic_count = 0
        for name in p2_seen:
            if name in base_stats:
                types = base_stats[name].get("types", [])
                if "psychic" in [t.lower() for t in types]:
                    p2_psychic_count += 1


        psychic_advantage = p1_psychic_count - p2_psychic_count

        rows.append({
            "battle_id": battle_id,
            "p1_psychic_count": p1_psychic_count,
            "p2_psychic_count": p2_psychic_count,
            "psychic_advantage": psychic_advantage
        })

    return pd.DataFrame(rows)


In [None]:
import pandas as pd

def create_electric_advantage_feature(data: list[dict], base_stats: dict) -> pd.DataFrame:
    """
    Calcola:
    electric_advantage = (# Electric P1) - (# Electric P2)

    P1 ‚Üí da p1_team_details
    P2 ‚Üí dai Pok√©mon effettivamente apparsi nella battle timeline
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id")


        p1_team = battle.get("p1_team_details", [])
        p1_electric_count = 0

        if isinstance(p1_team, list):
            for mon in p1_team:
                name = mon.get("name", "").lower()
                if name in base_stats:
                    types = base_stats[name].get("types", [])
                    if "electric" in [t.lower() for t in types]:
                        p1_electric_count += 1


        timeline = battle.get("battle_timeline", [])
        p2_seen = set()

        if isinstance(timeline, list):
            for turn in timeline:
                state = turn.get("p2_pokemon_state", {})
                name = state.get("name")
                if name:
                    p2_seen.add(name.lower())


        p2_electric_count = 0
        for name in p2_seen:
            if name in base_stats:
                types = base_stats[name].get("types", [])
                if "electric" in [t.lower() for t in types]:
                    p2_electric_count += 1


        electric_advantage = p1_electric_count - p2_electric_count

        rows.append({
            "battle_id": battle_id,
            "p1_electric_count": p1_electric_count,
            "p2_electric_count": p2_electric_count,
            "electric_advantage": electric_advantage
        })

    return pd.DataFrame(rows)


## Features refined

In [None]:
# RAFFINIAMO damage_diff
import pandas as pd

def create_damage_ratio_per_turn_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola il rapporto di danno medio per turno tra P1 e P2,
    gestendo eventuali campi nulli o incompleti nel battle_timeline.
    """
    rows = []
    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        p1_total_damage, p2_total_damage = 0, 0
        total_turns = len(timeline)

        for turn in timeline:
            p1_move = turn.get("p1_move_details") or {}
            p2_move = turn.get("p2_move_details") or {}

            p1_dmg = p1_move.get("damage", 0) or 0
            p2_dmg = p2_move.get("damage", 0) or 0

            p1_total_damage += p1_dmg
            p2_total_damage += p2_dmg

        p1_avg_dmg = p1_total_damage / max(total_turns, 1)
        p2_avg_dmg = p2_total_damage / max(total_turns, 1)
        damage_ratio = p1_avg_dmg - p2_avg_dmg

        rows.append({
            "battle_id": battle_id,
            "p1_avg_damage_per_turn": p1_avg_dmg,
            "p2_avg_damage_per_turn": p2_avg_dmg,
            "damage_ratio_per_turn": damage_ratio
        })

    return pd.DataFrame(rows)



In [None]:
# cattura quanto controllo strategico ha avuto il p1 sugli status

import pandas as pd
import numpy as np

def create_status_control_index_feature(data: list[dict]) -> pd.DataFrame:
    """
    Combina la frequenza di status inflitti/subiti e la severit√†
    in un unico indice di controllo strategico degli status.
    """
    valid_status = {"slp", "sleep", "frz", "freeze", "par", "paralyzed", "brn", "burn", "psn", "poison"}
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        status_inflicted = 0
        status_suffered = 0
        severity_score = 0

        for turn in timeline:
            p1_status = str(turn.get("p1_pokemon_state", {}).get("status", "")).lower()
            p2_status = str(turn.get("p2_pokemon_state", {}).get("status", "")).lower()

            if p2_status in valid_status:
                status_inflicted += 1
            if p1_status in valid_status:
                status_suffered += 1

            severity_weights = {
                "slp": 3, "sleep": 3,
                "frz": 3, "freeze": 3,
                "par": 2, "paralyzed": 2,
                "brn": 1, "burn": 1,
                "psn": 1, "poison": 1
            }
            if p2_status in severity_weights:
                severity_score += severity_weights[p2_status]
            if p1_status in severity_weights:
                severity_score -= severity_weights[p1_status]

        ratio = status_inflicted / (status_inflicted + status_suffered + 1e-6)
        status_control_index = ratio * severity_score

        rows.append({
            "battle_id": battle_id,
            "status_inflicted": status_inflicted,
            "status_suffered": status_suffered,
            "status_severity_score_alt": severity_score,
            "status_control_index": status_control_index
        })

    return pd.DataFrame(rows)


In [None]:
# features che cattura momentum iniziale e la capacit√† di un player di prendere subito il controllo del match

import pandas as pd

def create_first_ko_advantage_feature(data: list[dict]) -> pd.DataFrame:
    """
    Determina chi ottiene il primo KO nella battaglia.
    Restituisce +1 se P1 ottiene il primo KO, -1 se P2, 0 se nessuno.
    """
    rows = []

    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        first_ko = 0

        for turn in timeline:
            p1_hp = turn.get("p1_pokemon_state", {}).get("hp_pct", 1)
            p2_hp = turn.get("p2_pokemon_state", {}).get("hp_pct", 1)

            if p2_hp == 0 and p1_hp > 0:
                first_ko = 1  # P1 abbatte P2
                break
            elif p1_hp == 0 and p2_hp > 0:
                first_ko = -1  # P2 abbatte P1
                break

        rows.append({
            "battle_id": battle_id,
            "first_ko_advantage": first_ko
        })

    return pd.DataFrame(rows)


In [None]:
# features che misura quanto un giocatore √® efficiente nel convertire le proprie mosse in danno reale

import pandas as pd

def create_move_efficiency_index_feature(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la differenza di efficienza tra i due giocatori:
    quanto danno medio viene inflitto per mossa (P1 vs P2).
    Gestisce eventuali 'None' o chiavi mancanti.
    """
    rows = []
    for battle in data:
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not isinstance(timeline, list) or len(timeline) == 0:
            continue

        p1_total_damage, p2_total_damage = 0, 0
        p1_moves, p2_moves = 0, 0

        for turn in timeline:
            p1_move = turn.get("p1_move_details") or {}
            p2_move = turn.get("p2_move_details") or {}

            p1_dmg = p1_move.get("damage", 0) or 0
            p2_dmg = p2_move.get("damage", 0) or 0

            p1_total_damage += p1_dmg
            p2_total_damage += p2_dmg

            if p1_move:
                p1_moves += 1
            if p2_move:
                p2_moves += 1

        p1_eff = p1_total_damage / max(p1_moves, 1)
        p2_eff = p2_total_damage / max(p2_moves, 1)
        efficiency_diff = p1_eff - p2_eff

        rows.append({
            "battle_id": battle_id,
            "p1_move_efficiency": p1_eff,
            "p2_move_efficiency": p2_eff,
            "move_efficiency_index": efficiency_diff
        })

    return pd.DataFrame(rows)


## Features Boosted [Potenziamento TOP Features]

In [None]:
# EARLY MOMENTUM, misura la media del momentum nei primi 5 turni

def create_early_momentum_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola l'early momentum: frazione di turni (1-5)
    in cui hp_p1 > hp_p2.
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="‚ö° Early Momentum"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        window = timeline[:5]

        momentum_values = []
        for turn in window:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            momentum_values.append(1 if p1 > p2 else 0)

        early_momentum = np.mean(momentum_values) if len(momentum_values) > 0 else 0.0

        row = {
            "battle_id": battle_id,
            "early_momentum": early_momentum
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# MOMENTUM ROLLING, misura la media del momentum negli ultimi 5 turni

def create_rolling_momentum_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola il rolling momentum come media dei turni finali (ultimi 5),
    dove momentum_t = 1 se hp_p1 > hp_p2, altrimenti 0.
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üîÑ Rolling Momentum"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])

        if not timeline:
            continue

        window = timeline[-5:]

        values = []
        for turn in window:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            values.append(1 if p1 > p2 else 0)

        rolling_momentum = np.mean(values) if len(values) > 0 else 0.0

        row = {
            "battle_id": battle_id,
            "rolling_momentum": rolling_momentum,
        }

        # Aggiungi target se esiste
        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# MOMENTUM SLOPE, analizza il trend

 # MOMENTUM SLOPE FEATURE
def create_momentum_slope_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la slope del momentum tramite regressione lineare
    sul tempo: t -> momentum_t (1 se p1_hp > p2_hp, altrimenti 0).
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üìà Momentum Slope"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        momentum_values = []
        for turn in timeline:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            momentum_values.append(1 if p1 > p2 else 0)

        T = len(momentum_values)
        if T <= 1:
            slope = 0.0
        else:
            x = np.arange(T)
            y = np.array(momentum_values)

            # regressione lineare (slope)
            slope = np.polyfit(x, y, 1)[0]

        row = {
            "battle_id": battle_id,
            "momentum_slope": slope
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# MOMENTUM VOLATILITY FEATURE
def create_momentum_volatility_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la volatilit√† del momentum come deviazione standard
    della serie momentum_t (1 se p1_hp > p2_hp, altrimenti 0).
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üå™ Momentum Volatility"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        momentum_values = []
        for turn in timeline:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            momentum_values.append(1 if p1 > p2 else 0)

        if len(momentum_values) <= 1:
            volatility = 0.0
        else:
            volatility = float(np.std(momentum_values))

        row = {
            "battle_id": battle_id,
            "momentum_volatility": volatility
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# DAMAGE SLOPE FEATURE
def create_damage_slope_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la 'damage slope' tramite regressione lineare
    sulla serie cumulativa di damage_diff(t).
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üìâ Damage Slope"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])

        if not timeline:
            continue

        p1_cum_dmg = 0.0
        p2_cum_dmg = 0.0
        diff_series = []

        for i in range(1, len(timeline)):
            prev = timeline[i - 1]
            curr = timeline[i]

            try:
                p2_prev = prev["p2_pokemon_state"]["hp_pct"]
                p2_curr = curr["p2_pokemon_state"]["hp_pct"]
                dmg = max(0, p2_prev - p2_curr)
                p1_cum_dmg += dmg
            except:
                pass

            try:
                p1_prev = prev["p1_pokemon_state"]["hp_pct"]
                p1_curr = curr["p1_pokemon_state"]["hp_pct"]
                dmg = max(0, p1_prev - p1_curr)
                p2_cum_dmg += dmg
            except:
                pass

            diff_series.append(p1_cum_dmg - p2_cum_dmg)

        T = len(diff_series)

        if T <= 1:
            slope = 0.0
        else:
            x = np.arange(T)
            y = np.array(diff_series)

            slope = np.polyfit(x, y, 1)[0]

        row = {
            "battle_id": battle_id,
            "damage_slope": slope
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# AUC_HP_DIFF FEATURE
def create_auc_hp_diff_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola l'AUC del vantaggio HP nel tempo:
    AUC_HP_diff = somma_t (hp_pct_p1(t) - hp_pct_p2(t)).
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üìò AUC HP Diff"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        diff_list = []
        for turn in timeline:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            diff_list.append(p1 - p2)

        if len(diff_list) == 0:
            auc = 0.0
        else:
            # somma della curva (AUC discreta)
            auc = float(np.sum(diff_list))

        row = {
            "battle_id": battle_id,
            "auc_hp_diff": auc
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# SWING COUNT FEATURE
def create_swing_count_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola quanti cambi di vantaggio (swing) ci sono durante la battaglia
    sulla base del segno di (hp_p1 - hp_p2) per ogni turno.
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üîÅ Swing Count"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])

        if not timeline:
            continue

        diff_signs = []

        for turn in timeline:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            diff = p1 - p2
            sign = np.sign(diff)

            diff_signs.append(sign)

        # Ora calcoliamo gli swing
        swing_count = 0
        for i in range(1, len(diff_signs)):
            if diff_signs[i] != 0 and diff_signs[i-1] != 0:
                if diff_signs[i] != diff_signs[i-1]:
                    swing_count += 1

        row = {
            "battle_id": battle_id,
            "swing_count": swing_count
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# MAX CONSECUTIVE ADVANTAGE FEATURE
def create_max_consecutive_advantage(data: list[dict]) -> pd.DataFrame:
    """
    Calcola la striscia massima consecutiva di vantaggio HP per P1 e P2
    e restituisce la differenza.
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="üìà Max Consecutive Advantage"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        diff_signs = []
        for turn in timeline:
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            diff = p1 - p2
            sign = np.sign(diff)  # +1, 0, -1
            diff_signs.append(sign)

        max_p1 = 0
        max_p2 = 0

        curr_p1 = 0
        curr_p2 = 0

        for s in diff_signs:
            if s == 1:
                curr_p1 += 1
                curr_p2 = 0
            elif s == -1:
                curr_p2 += 1
                curr_p1 = 0
            else:
                curr_p1 = 0
                curr_p2 = 0

            max_p1 = max(max_p1, curr_p1)
            max_p2 = max(max_p2, curr_p2)

        row = {
            "battle_id": battle_id,
            "max_consecutive_adv_p1": max_p1,
            "max_consecutive_adv_p2": max_p2,
            "max_consecutive_adv_diff": max_p1 - max_p2
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# MOMENTUM ENERGY FEATURE
def create_momentum_energy_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola il 'momentum energy' pesando il vantaggio HP(t)
    per un peso crescente verso i turni finali.
    ME = sum_t [ (hp_p1(t) - hp_p2(t)) * (t/T) ]
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc="‚ö° Momentum Energy"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        T = len(timeline)
        me_sum = 0.0  # accumulator

        for t, turn in enumerate(timeline, start=1):
            p1 = turn.get("p1_pokemon_state", {}).get("hp_pct")
            p2 = turn.get("p2_pokemon_state", {}).get("hp_pct")

            if p1 is None or p2 is None:
                continue

            momentum_t = p1 - p2
            weight_t = t / T

            me_sum += momentum_t * weight_t

        row = {
            "battle_id": battle_id,
            "momentum_energy": me_sum
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


In [None]:
# DAMAGE BURST FEATURE
def create_damage_burst_features(data: list[dict]) -> pd.DataFrame:
    """
    Calcola il massimo danno inflitto in un singolo turno da P1 e P2,
    insieme alla differenza.
    """
    from tqdm.notebook import tqdm
    import pandas as pd
    import numpy as np

    rows = []

    for battle in tqdm(data, desc=" Damage Burst"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        p1_bursts = []
        p2_bursts = []

        for i in range(1, len(timeline)):
            prev = timeline[i - 1]
            curr = timeline[i]

            try:
                dmg_p1 = max(
                    0,
                    prev["p2_pokemon_state"]["hp_pct"] - curr["p2_pokemon_state"]["hp_pct"]
                )
                p1_bursts.append(dmg_p1)
            except:
                pass

            # Danno inflitto da P2 a P1
            try:
                dmg_p2 = max(
                    0,
                    prev["p1_pokemon_state"]["hp_pct"] - curr["p1_pokemon_state"]["hp_pct"]
                )
                p2_bursts.append(dmg_p2)
            except:
                pass

        burst_p1 = max(p1_bursts) if len(p1_bursts) > 0 else 0.0
        burst_p2 = max(p2_bursts) if len(p2_bursts) > 0 else 0.0

        row = {
            "battle_id": battle_id,
            "damage_burst_p1": burst_p1,
            "damage_burst_p2": burst_p2,
            "damage_burst_diff": burst_p1 - burst_p2
        }

        if "player_won" in battle:
            row["player_won"] = int(battle["player_won"])

        rows.append(row)

    return pd.DataFrame(rows).fillna(0)


## Features based on timeline p.2

In [None]:
def create_timeline_strategy_features(data: list[dict]) -> pd.DataFrame:
    """
    Estrae feature strategiche dal battle_timeline.
    Restituisce SOLO differenze tra player_1 e player_2:
      - total_boosts_diff
      - avg_priority_diff
      - avg_power_diff
      - num_status_moves_diff
      - stab_ratio_diff (Same Type Attack Bonus)
    """
    from tqdm.notebook import tqdm
    import numpy as np
    import pandas as pd

    feature_list = []

    for battle in tqdm(data, desc="Estrazione timeline diff features (con STAB ratio)"):
        battle_id = battle.get("battle_id")
        timeline = battle.get("battle_timeline", [])
        if not timeline:
            continue

        # --- funzione helper per player ---
        def extract_features(move_key, state_key):
            moves = [t[move_key] for t in timeline if t.get(move_key) is not None]

            boosts = [
                t[state_key]["boosts"]
                for t in timeline
                if t.get(state_key) and isinstance(t[state_key].get("boosts"), dict)
            ]

            total_boosts = sum(1 for b in boosts for v in b.values() if v != 0)
            avg_priority = np.mean([m.get("priority", 0) for m in moves]) if moves else 0

            offensive_moves = [
                m for m in moves if m.get("category") in ["PHYSICAL", "SPECIAL"]
            ]
            avg_power = np.mean(
                [m.get("base_power", 0) for m in offensive_moves]
            ) if offensive_moves else 0

            num_status_moves = sum(
                1 for m in moves if m.get("category") == "STATUS"
            )

            stab_hits, total_moves = 0, 0

            for turn in timeline:
                move = turn.get(move_key)
                state = turn.get(state_key)
                if move and state:
                    total_moves += 1
                    move_type = move.get("type", "")
                    poke_types = state.get("types", [])
                    if isinstance(poke_types, list) and move_type in poke_types:
                        stab_hits += 1

            stab_ratio = stab_hits / total_moves if total_moves > 0 else 0

            return total_boosts, avg_priority, avg_power, num_status_moves, stab_ratio

        p1_vals = extract_features("p1_move_details", "p1_pokemon_state")
        p2_vals = extract_features("p2_move_details", "p2_pokemon_state")

        features = {
            "battle_id": battle_id,
            "total_boosts_diff": p1_vals[0] - p2_vals[0],
            "avg_priority_diff": p1_vals[1] - p2_vals[1],
            "avg_power_diff": p1_vals[2] - p2_vals[2],
            "num_status_moves_diff": p1_vals[3] - p2_vals[3],
            "stab_ratio_diff": p1_vals[4] - p2_vals[4],
        }

        if "player_won" in battle:
            features["player_won"] = int(battle["player_won"])

        feature_list.append(features)

    return pd.DataFrame(feature_list).fillna(0)


# Merging Features


In [None]:
# Calcolo feature strategiche per il TRAIN

print("aCreazione feature per il TRAIN set...")

# Estrazione feature da ogni blocco
static_train       = create_static_features(train_data)
dynamic_train      = create_dynamic_features(train_data)
eff_train          = create_type_effectiveness_feature(train_data)
strategic_train    = create_strategic_features(train_data)
timeline_train     = create_timeline_strategy_features(train_data)
rng_train          = create_rng_features(train_data)
critical_hit_train = create_critical_hit_features(train_data)
paralysis_train    = create_paralysis_features(train_data)
freeze_train       = create_freeze_features(train_data)
sleep_train        = create_sleep_features(train_data)
paralysis_block_train = create_paralysis_block_rate_feature(train_data)
status_severity_train = create_status_severity_feature(train_data)
status_net_train = create_status_net_balance_feature(train_data)
lead_duration_train = create_lead_duration_feature(train_data)
crit_ratio_train = create_critical_hit_ratio_feature(train_data)
hp_recovery_train = create_hp_recovery_feature(train_data)
#luck_control_train = create_luck_control_index(train_df)
damage_ratio_train = create_damage_ratio_per_turn_feature(train_data)
status_control_train = create_status_control_index_feature(train_data)
move_eff_train = create_move_efficiency_index_feature(train_data)
momentum_train = create_momentum_score_feature(train_data)
first_ko_train = create_first_ko_advantage_feature(train_data)
switch_train = create_switch_count_diff_feature(train_data)
special_adv_train = create_special_team_advantage_feature(train_data, pokemon_base_stats)
psychic_adv_train = create_psychic_advantage_feature(train_data, pokemon_base_stats)
electric_adv_train = create_electric_advantage_feature(train_data, pokemon_base_stats)
early_momentum_train = create_early_momentum_features(train_data)
early_momentum_train = early_momentum_train.drop(columns=["player_won"], errors="ignore")
rolling_train = create_rolling_momentum_features(train_data)
rolling_train = rolling_train.drop(columns=["player_won"], errors="ignore")
slope_train = create_momentum_slope_features(train_data)
slope_train = slope_train.drop(columns=["player_won"], errors="ignore")
vol_train = create_momentum_volatility_features(train_data)
vol_train = vol_train.drop(columns=["player_won"], errors="ignore")
damage_slope_train = create_damage_slope_features(train_data)
damage_slope_train = damage_slope_train.drop(columns=["player_won"], errors="ignore")
auc_train = create_auc_hp_diff_features(train_data)
auc_train = auc_train.drop(columns=["player_won"], errors="ignore")
swing_train = create_swing_count_features(train_data)
swing_train = swing_train.drop(columns=["player_won"], errors="ignore")
maxadv_train = create_max_consecutive_advantage(train_data)
maxadv_train = maxadv_train.drop(columns=["player_won"], errors="ignore")
me_train = create_momentum_energy_features(train_data)
me_train = me_train.drop(columns=["player_won"], errors="ignore")
burst_train = create_damage_burst_features(train_data)
burst_train = burst_train.drop(columns=["player_won"], errors="ignore")
timeline_strategy_train = create_timeline_strategy_features(train_data)
timeline_strategy_train = timeline_strategy_train.drop(columns=["player_won"], errors="ignore")






# Calcolo feature Top-Tier
def count_top_tiers_p1(battle):
    team = battle.get("p1_team_details", [])
    if not isinstance(team, list):
        return 0
    names = [p.get("name", "").capitalize() for p in team if isinstance(p, dict)]
    top_tiers = ["Tauros", "Snorlax", "Chansey", "Starmie", "Exeggutor"]
    return sum(1 for n in names if n in top_tiers)

def count_top_tiers_p2(battle):
    names = set()
    top_tiers = ["Tauros", "Snorlax", "Chansey", "Starmie", "Exeggutor"]

    lead = battle.get("p2_lead_details")
    if isinstance(lead, dict):
        names.add(lead.get("name", "").capitalize())

    timeline = battle.get("battle_timeline", [])
    if isinstance(timeline, list):
        for turn in timeline:
            if isinstance(turn, dict) and "p2_pokemon_state" in turn:
                n = turn["p2_pokemon_state"].get("name", "").capitalize()
                if n:
                    names.add(n)

    return sum(1 for n in names if n in top_tiers)

tier_features_train = pd.DataFrame({
    "battle_id": [b["battle_id"] for b in train_data],
    "p1_top_tier_count": [count_top_tiers_p1(b) for b in train_data],
    "p2_top_tier_count": [count_top_tiers_p2(b) for b in train_data]
})
tier_features_train["tier_diff"] = tier_features_train["p1_top_tier_count"] - tier_features_train["p2_top_tier_count"]
tier_features_train["tier_advantage"] = (tier_features_train["tier_diff"] > 0).astype(int)

strategic_train['player_won'] = strategic_train['player_won'].fillna(0).astype(int)

def _drop_target_if_present(df):
    return df.drop(columns=["player_won"], errors="ignore")

dynamic_train      = _drop_target_if_present(dynamic_train)
eff_train          = _drop_target_if_present(eff_train)
strategic_train    = _drop_target_if_present(strategic_train)
timeline_train     = _drop_target_if_present(timeline_train)
rng_train          = _drop_target_if_present(rng_train)
critical_hit_train = _drop_target_if_present(critical_hit_train)
paralysis_train    = _drop_target_if_present(paralysis_train)
freeze_train       = _drop_target_if_present(freeze_train)
sleep_train        = _drop_target_if_present(sleep_train)

# --- Merge completo ---
train_df = static_train.copy()

train_df = (
    train_df
    .merge(dynamic_train, on="battle_id", how="left")
    .merge(eff_train, on="battle_id", how="left")
    .merge(strategic_train, on="battle_id", how="left")
    .merge(timeline_train, on="battle_id", how="left")
    .merge(rng_train, on="battle_id", how="left")
    .merge(critical_hit_train, on="battle_id", how="left")
    .merge(paralysis_train, on="battle_id", how="left")
    .merge(freeze_train, on="battle_id", how="left")
    .merge(sleep_train, on="battle_id", how="left")
    .merge(tier_features_train, on="battle_id", how="left")
    .merge(paralysis_block_train, on="battle_id", how="left")
    .merge(status_severity_train, on="battle_id", how="left")
    .merge(status_net_train, on="battle_id", how="left")
    .merge(lead_duration_train, on="battle_id", how="left")
    .merge(crit_ratio_train, on="battle_id", how="left")
    .merge(hp_recovery_train, on="battle_id", how="left")
    #.merge(luck_control_train, on="battle_id", how="left")
    .merge(damage_ratio_train, on="battle_id", how="left")
    .merge(move_eff_train, on="battle_id", how="left")
    .merge(status_control_train, on="battle_id", how="left")
    .merge(momentum_train, on="battle_id", how="left")
    .merge(first_ko_train, on="battle_id", how="left")
    .merge(switch_train, on="battle_id", how="left")
    .merge(special_adv_train, on="battle_id", how="left")
    .merge(psychic_adv_train, on="battle_id", how="left")
    .merge(electric_adv_train, on="battle_id", how="left")
    .merge(early_momentum_train, on="battle_id", how="left")
    .merge(rolling_train, on="battle_id", how="left")
    .merge(slope_train, on="battle_id", how="left")
    .merge(vol_train, on="battle_id", how="left")
    .merge(damage_slope_train, on="battle_id", how="left")
    .merge(auc_train, on="battle_id", how="left")
    .merge(swing_train, on="battle_id", how="left")
    .merge(maxadv_train, on="battle_id", how="left")
    .merge(me_train, on="battle_id", how="left")
    .merge(burst_train, on="battle_id", how="left")
    .merge(timeline_strategy_train, on="battle_id", how="left")
    .merge(timeline_strategy_train, on="battle_id", how="left")
)

# Ora che train_df contiene tutte le colonne necessarie
luck_control_train = create_luck_control_index(train_df)
train_df = train_df.merge(luck_control_train, on="battle_id", how="left")


# --- Pulizia finale ---
train_df = train_df.replace([np.inf, -np.inf], 0).fillna(0)

print("Train DF pronto:", train_df.shape)


In [None]:
print("Creazione feature per il TEST set...")

# --- Estrazione feature dal TEST ---
static_test       = create_static_features(test_data)
dynamic_test      = create_dynamic_features(test_data)
eff_test          = create_type_effectiveness_feature(test_data)
strategic_test    = create_strategic_features(test_data)
timeline_test     = create_timeline_strategy_features(test_data)
rng_test          = create_rng_features(test_data)
critical_hit_test = create_critical_hit_features(test_data)
paralysis_test    = create_paralysis_features(test_data)
freeze_test       = create_freeze_features(test_data)
sleep_test        = create_sleep_features(test_data)
paralysis_block_test = create_paralysis_block_rate_feature(test_data)
status_severity_test = create_status_severity_feature(test_data)
status_net_test = create_status_net_balance_feature(test_data)
lead_duration_test = create_lead_duration_feature(test_data)
crit_ratio_test = create_critical_hit_ratio_feature(test_data)
hp_recovery_test = create_hp_recovery_feature(test_data)
#luck_control_test = create_luck_control_index(test_df)
damage_ratio_test = create_damage_ratio_per_turn_feature(test_data)
status_control_test = create_status_control_index_feature(test_data)
move_eff_test = create_move_efficiency_index_feature(test_data)
momentum_test = create_momentum_score_feature(test_data)
first_ko_test = create_first_ko_advantage_feature(test_data)
switch_test = create_switch_count_diff_feature(test_data)
special_adv_test = create_special_team_advantage_feature(test_data, pokemon_base_stats)
psychic_adv_test = create_psychic_advantage_feature(test_data, pokemon_base_stats)
electric_adv_test = create_electric_advantage_feature(test_data, pokemon_base_stats)
early_momentum_test = create_early_momentum_features(test_data)
rolling_test = create_rolling_momentum_features(test_data)
slope_test = create_momentum_slope_features(test_data)
vol_test = create_momentum_volatility_features(test_data)
damage_slope_test = create_damage_slope_features(test_data)
auc_test = create_auc_hp_diff_features(test_data)
swing_test = create_swing_count_features(test_data)
maxadv_test = create_max_consecutive_advantage(test_data)
me_test = create_momentum_energy_features(test_data)
burst_test = create_damage_burst_features(test_data)
timeline_strategy_test = create_timeline_strategy_features(test_data)



# --- Calcolo feature Top-Tier per il TEST ---
def count_top_tiers_p1(battle):
    team = battle.get("p1_team_details", [])
    if not isinstance(team, list):
        return 0
    names = [p.get("name", "").capitalize() for p in team if isinstance(p, dict)]
    top_tiers = ["Tauros", "Snorlax", "Chansey", "Starmie", "Exeggutor"]
    return sum(1 for n in names if n in top_tiers)

def count_top_tiers_p2(battle):
    names = set()
    top_tiers = ["Tauros", "Snorlax", "Chansey", "Starmie", "Exeggutor"]

    lead = battle.get("p2_lead_details")
    if isinstance(lead, dict):
        names.add(lead.get("name", "").capitalize())

    timeline = battle.get("battle_timeline", [])
    if isinstance(timeline, list):
        for turn in timeline:
            if isinstance(turn, dict) and "p2_pokemon_state" in turn:
                n = turn["p2_pokemon_state"].get("name", "").capitalize()
                if n:
                    names.add(n)
    return sum(1 for n in names if n in top_tiers)

tier_features_test = pd.DataFrame({
    "battle_id": [b["battle_id"] for b in test_data],
    "p1_top_tier_count": [count_top_tiers_p1(b) for b in test_data],
    "p2_top_tier_count": [count_top_tiers_p2(b) for b in test_data]
})
tier_features_test["tier_diff"] = tier_features_test["p1_top_tier_count"] - tier_features_test["p2_top_tier_count"]
tier_features_test["tier_advantage"] = (tier_features_test["tier_diff"] > 0).astype(int)

def _drop_target(df):
    return df.drop(columns=["player_won"], errors="ignore") if isinstance(df, pd.DataFrame) else df

static_test       = _drop_target(static_test)
dynamic_test      = _drop_target(dynamic_test)
eff_test          = _drop_target(eff_test)
strategic_test    = _drop_target(strategic_test)
timeline_test     = _drop_target(timeline_test)
rng_test          = _drop_target(rng_test)
critical_hit_test = _drop_target(critical_hit_test)
paralysis_test    = _drop_target(paralysis_test)
freeze_test       = _drop_target(freeze_test)
sleep_test        = _drop_target(sleep_test)

test_df = (
    static_test
    .merge(dynamic_test, on="battle_id", how="left")
    .merge(eff_test, on="battle_id", how="left")
    .merge(strategic_test, on="battle_id", how="left")
    .merge(timeline_test, on="battle_id", how="left")
    .merge(rng_test, on="battle_id", how="left")
    .merge(critical_hit_test, on="battle_id", how="left")
    .merge(paralysis_test, on="battle_id", how="left")
    .merge(freeze_test, on="battle_id", how="left")
    .merge(sleep_test, on="battle_id", how="left")
    .merge(tier_features_test, on="battle_id", how="left")
    .merge(paralysis_block_test, on="battle_id", how="left")
    .merge(status_severity_test, on="battle_id", how="left")
    .merge(status_net_test, on="battle_id", how="left")
    .merge(lead_duration_test, on="battle_id", how="left")
    .merge(crit_ratio_test, on="battle_id", how="left")
    .merge(hp_recovery_test, on="battle_id", how="left")
    #.merge(luck_control_test, on="battle_id", how="left")
    .merge(damage_ratio_test, on="battle_id", how="left")
    .merge(status_control_test, on="battle_id", how="left")
    .merge(move_eff_test, on="battle_id", how="left")
    .merge(momentum_test, on="battle_id", how="left")
    .merge(first_ko_test, on="battle_id", how="left")
    .merge(switch_test, on="battle_id", how="left")
    .merge(special_adv_test, on="battle_id", how="left")
    .merge(psychic_adv_test, on="battle_id", how="left")
    .merge(electric_adv_test, on="battle_id", how="left")
    .merge(early_momentum_test, on="battle_id", how="left")
    .merge(rolling_test, on="battle_id", how="left")
    .merge(slope_test, on="battle_id", how="left")
    .merge(vol_test, on="battle_id", how="left")
    .merge(damage_slope_test, on="battle_id", how="left")
    .merge(auc_test, on="battle_id", how="left")
    .merge(swing_test, on="battle_id", how="left")
    .merge(maxadv_test, on="battle_id", how="left")
    .merge(me_test, on="battle_id", how="left")
    .merge(burst_test, on="battle_id", how="left")
    .merge(timeline_strategy_test, on="battle_id", how="left")
)

luck_control_test = create_luck_control_index(test_df)
test_df = test_df.merge(luck_control_test, on="battle_id", how="left")

test_df = test_df.replace([np.inf, -np.inf], 0).fillna(0)

# --- Allineamento colonne al TRAIN ---
feature_cols = [c for c in train_df.columns if c not in ["battle_id", "player_won"]]
test_df = test_df.reindex(columns=["battle_id"] + feature_cols, fill_value=0)

# --- Matrici pronte per il modello ---
X_train = train_df[feature_cols].values
y_train = train_df["player_won"].astype(int).values
X_test  = test_df[feature_cols].values
test_ids = test_df["battle_id"].values

print("Test DF pronto:", test_df.shape)
print("X_train / X_test:", X_train.shape, X_test.shape)
