In [1]:
# IMPORT
# ======

import json
import copy
import pandas as pd
import os
from tqdm.notebook import tqdm
import numpy as np
from collections import Counter
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from catboost import CatBoostClassifier

In [2]:
#COSTANTI
#--------

# Lista dei Pokémon OverUsed di Gen 1
POKEDEX_OU = {
    "alakazam": {
        "name" : "alakazam",
        "types": ["psychic"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "chansey": {
        "name" : "chansey",
        "types": ["normal"],
        "real_hp": 703, "real_atk": 108, "real_def": 108,
        "real_spa": 308, "real_spe": 198,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "cloyster": {
        "name" : "cloyster",
        "types": ["water", "ice"],
        "real_hp": 303, "real_atk": 288, "real_def": 458,
        "real_spa": 268, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "exeggutor": {
        "name" : "exeggutor",
        "types": ["grass", "psychic"],
        "real_hp": 393, "real_atk": 288, "real_def": 268,
        "real_spa": 348, "real_spe": 208,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "gengar": {
        "name" : "gengar",
        "types": ["ghost", "poison"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "jolteon": {
        "name" : "jolteon",
        "types": ["electric"],
        "real_hp": 323, "real_atk": 228, "real_def": 218,
        "real_spa": 358, "real_spe": 318,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "jynx": {
        "name" : "jynx",
        "types": ["ice", "psychic"],
        "real_hp": 333, "real_atk": 198, "real_def": 168,
        "real_spa": 288, "real_spe": 288,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "rhydon": {
        "name" : "rhydon",
        "types": ["ground", "rock"],
        "real_hp": 413, "real_atk": 358, "real_def": 338,
        "real_spa": 188, "real_spe": 178,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "snorlax": {
        "name" : "snorlax",
        "types": ["normal"],
        "real_hp": 523, "real_atk": 318, "real_def": 228,
        "real_spa": 228, "real_spe": 158,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "starmie": {
        "name" : "starmie",
        "types": ["water", "psychic"],
        "real_hp": 323, "real_atk": 248, "real_def": 268,
        "real_spa": 298, "real_spe": 328,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "tauros": {
        "name" : "tauros",
        "types": ["normal"],
        "real_hp": 353, "real_atk": 298, "real_def": 288,
        "real_spa": 238, "real_spe": 318,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "zapdos": {
        "name" : "zapdos",
        "types": ["electric", "flying"],
        "real_hp": 383, "real_atk": 278, "real_def": 268,
        "real_spa": 348, "real_spe": 298,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "dragonite": {
        "name" : "dragonite",
        "types": ["dragon", "flying"],
        "real_hp": 325, "real_atk": 266, "real_def": 228,
        "real_spa": 238, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "golem": {
        "name" : "golem",
        "types": ["rock", "ground"],
        "real_hp": 313, "real_atk": 198, "real_def": 188,
        "real_spa": 368, "real_spe": 338,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "charizard": {
        "name" : "charizard",
        "types": ["fire", "flying"],
        "real_hp": 363, "real_atk": 318, "real_def": 358,
        "real_spa": 208, "real_spe": 188,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "victreebel": {
        "name" : "victreebel",
        "types": ["grass", "poison"],
        "real_hp": 363, "real_atk": 308, "real_def": 228,
        "real_spa": 298, "real_spe": 238,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "lapras": {
        "name" : "lapras",
        "types": ["water", "ice"],
        "real_hp": 463, "real_atk": 268, "real_def": 258,
        "real_spa": 288, "real_spe": 218,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "slowbro": {
        "name" : "slowbro",
        "types": ["water", "psychic"],
        "real_hp": 393, "real_atk": 248, "real_def": 318,
        "real_spa": 258, "real_spe": 158,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "articuno": {
        "name" : "articuno",
        "types": ["ice", "flying"],
        "real_hp": 383, "real_atk": 268, "real_def": 298,
        "real_spa": 348, "real_spe": 268,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    },
    "persian": {
        "name" : "persian",
        "types": ["normal"],
        "real_hp": 333, "real_atk": 238, "real_def": 218,
        "real_spa": 228, "real_spe": 328,
        "current_hp_pct" : 1.0,
        "current_status" : "nostatus",
        "current_boost" : {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0},
        "current_effect" : ['noeffect'],
        "moves_used": {}
    }
}

ALL_MOVE = {
    "counter": {
        "name": "counter",
        "type": "fighting",
        "category": "physical",
        "base_power": 1,        # speciale: restituisce il doppio del danno subito
        "accuracy": 1.0,
        "priority": -5,
        "pp": 20
    },

    "icebeam": {
        "name": "icebeam",
        "type": "ice",
        "category": "special",
        "base_power": 95,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 10
    },

    "hyperbeam": {
        "name": "hyperbeam",
        "type": "normal",
        "category": "physical",
        "base_power": 150,
        "accuracy": 0.9,
        "priority": 0,
        "pp": 5
    },

    "fireblast": {
        "name": "fireblast",
        "type": "fire",
        "category": "special",
        "base_power": 120,
        "accuracy": 0.85,
        "priority": 0,
        "pp": 5
    },

    "confuseray": {
        "name": "confuseray",
        "type": "ghost",
        "category": "status",
        "base_power": 0,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 10
    },

    "toxic": {
        "name": "toxic",
        "type": "poison",
        "category": "status",
        "base_power": 0,
        "accuracy": 0.85,
        "priority": 0,
        "pp": 10
    },

    "razorleaf": {
        "name": "razorleaf",
        "type": "grass",
        "category": "special",
        "base_power": 55,
        "accuracy": 0.95,
        "priority": 0,
        "pp": 25,
        "high_crit": True
    },

    "sleeppowder": {
        "name": "sleeppowder",
        "type": "grass",
        "category": "status",
        "base_power": 0,
        "accuracy": 0.75,
        "priority": 0,
        "pp": 15
    },

    "amnesia": {
        "name": "amnesia",
        "type": "psychic",
        "category": "status",
        "base_power": 0,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 20
    },

    "recover": {
        "name": "recover",
        "type": "normal",
        "category": "status",
        "base_power": 0,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 20
    },

    "agility": {
        "name": "agility",
        "type": "psychic",
        "category": "status",
        "base_power": 0,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 30
    },

    "bodyslam": {
        "name": "bodyslam",
        "type": "normal",
        "category": "physical",
        "base_power": 85,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 15
    },

    "seismictoss": {
        "name": "seismictoss",
        "type": "fighting",
        "category": "physical",
        "base_power": 0,   # infligge dmg = livello
        "accuracy": 1.0,
        "priority": 0,
        "pp": 20
    },

    "sing": {
        "name": "sing",
        "type": "normal",
        "category": "status",
        "base_power": 0,
        "accuracy": 0.55,
        "priority": 0,
        "pp": 15
    },

    "blizzard": {
        "name": "blizzard",
        "type": "ice",
        "category": "special",
        "base_power": 120,
        "accuracy": 0.9,
        "priority": 0,
        "pp": 5
    },

    "slash": {
        "name": "slash",
        "type": "normal",
        "category": "physical",
        "base_power": 70,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 20,
        "high_crit": True
    },

    "thunderbolt": {
        "name": "thunderbolt",
        "type": "electric",
        "category": "special",
        "base_power": 95,
        "accuracy": 1.0,
        "priority": 0,
        "pp": 15
    }
}

In [3]:
# IMPORT E CARICAMENTO FILE
# =========================

# --- Define the path to our data ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('../input', COMPETITION_NAME)

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')
train_data = []

# --- Read the file line by line ---
print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            # json.loads() parses one line (one JSON object) into a Python dictionary
            train_data.append(json.loads(line))

    print(f"Successfully loaded {len(train_data)} battles.")

except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

train_data = [battle for battle in train_data if battle.get("battle_id") != 4877]

Loading data from '../input/fds-pokemon-battles-prediction-2025/train.jsonl'...
Successfully loaded 10000 battles.


In [4]:
def extract_team_p1(b):
    team = b.get('p1_team_details')
    return {p["name"]:POKEDEX_OU.get(p["name"], {}) for p in team}

def extend_team_p2(team, turn):
    p2_state = turn.get("p2_pokemon_state")

    if p2_state:
        p_name = p2_state.get("name", "")
        p2_active_pokemon = POKEDEX_OU.get(p_name, {})

        if p_name not in team:
            team[p_name] = p2_active_pokemon
        team = compute_team(team, turn, "p2")
    return team

def compute_team(team, turn, player_key):
    state = turn.get(player_key+"_pokemon_state", {})
    if turn.get("turn", -1) == 30:
        team = compute_effects(team, state)
        team = compute_boost(team, state)
    team = extract_used_move(team, turn, state, player_key)
    team = compute_remaining_hp(team, state)
    team = compute_status(team, state)
    return team

def compute_effects(team, state):
    name_from_state = state["name"]
    poke = team[name_from_state]
    original_effect = state.get("effects", ["noeffect"])
    
    if original_effect != ["noeffect"]:
        poke["current_effect"] = original_effect
    return team

def compute_boost(team, state):
    name_from_state = state["name"]
    poke = team[name_from_state]
    original_boost = state.get("boosts")

    if original_boost != {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0}:
        poke["original_boost"] = original_boost
    return team
    
def compute_status(team, state):
    name_from_state = state["name"]
    poke = team[name_from_state]
    original_status = state.get("status")

    if original_status == "fnt":
        team.pop(name_from_state)
    else:
        poke["current_status"] = original_status
    return team

def counter_fnt(counter, state):
    original_status = state.get("status")

    if original_status == "fnt":
        counter +=1
    return counter

def extract_used_move(team, turn, state, player_key):
    move = turn.get(player_key+"_move_details")

    if move:
        m_name = move["name"]
        team[state["name"]] = copy.deepcopy(team[state["name"]])
        used = team[state["name"]]["moves_used"]
        
        if m_name not in used:
            if m_name in ALL_MOVE:
                used[m_name] = ALL_MOVE.get(m_name)
            else:
                move["count"] = 0
                used.setdefault(m_name, move)

        if m_name in ALL_MOVE:
            used[m_name]["pp"] -= 1
            if used[m_name]["pp"] <= 0:
                used.pop(m_name)
        else:
            used[m_name]["count"] += 1
            
    return team

# Calcola percentuale HP rimanenti
def compute_remaining_hp(team, state):
    poke = team[state["name"]]
    poke["current_hp_pct"] = state.get("hp_pct")
    return team

def insert_stat(features, team, stat, status_fnt_count, player_key):
    avg = 0
    team_len = len(team)

    if team_len != 0:
        vals = [p[stat] for p in team]
        tot = sum(vals)

        if player_key == "p1":
            features[f"{player_key}_{stat}_sum"] = tot
        else:
            if stat == "current_hp_pct":
                missing = 6 - (team_len + status_fnt_count)
                vals.extend([1.0] * missing)
        tot = sum(vals)
        avg = tot / len(vals)
        features[f"{player_key}_{stat}_mean"] = avg
        features[f"{player_key}_{stat}_max"] = max(vals)
        features[f"{player_key}_{stat}_min"] = min(vals)
    else:
        features[f"{player_key}_{stat}_mean"] = avg
        
    return features

def create_features(data: list[dict]) -> pd.DataFrame:
    feature_list = []

    for battle in tqdm(data, desc="Extracting features"):
        features = {}
        p1_status_fnt_count = 0
        p2_status_fnt_count = 0
        
        # BATTLE ID
        features['battle_id'] = battle.get('battle_id', -1)

        # MATCH WINNER
        if 'player_won' in battle: features['player_won'] = int(battle['player_won'])

        # --- Player 1 Team ---
        p1_team = extract_team_p1(battle)
        p2_team = {}

        # --- timeline ---
        timeline = battle.get("battle_timeline", [])

        for turn in timeline:
            p1_state = turn.get("p1_pokemon_state", {})
            p2_state = turn.get("p2_pokemon_state", {})
            
            p1_status_fnt_count = counter_fnt(p1_status_fnt_count, p1_state)
            p2_status_fnt_count = counter_fnt(p2_status_fnt_count, p2_state)
            
            if turn.get("turn", -1) == 30: 
                if p1_state.get("effects") != ["noeffect"]:
                    for e in p1_state.get("effects"):
                        features[f"p1_effect_{e}"] = 1

                if p2_state.get("effects") != ["noeffect"]:
                    for e in p2_state.get("effects"):
                        features[f"p2_effect_{e}"] = 1

                if p1_state.get("boosts") != {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0}:
                    features["p1_boosts"] = int(sum(v for k, v in p1_state.get("boosts", {}).items() if k != "spd"))

                if p2_state.get("boosts") != {'atk': 0, 'def': 0, 'spa': 0, 'spd': 0, 'spe': 0}:
                    features["p2_boosts"] = int(sum(v for k, v in p2_state.get("boosts", {}).items() if k != "spd"))

            p1_team = compute_team(p1_team, turn, "p1")
            p2_team = extend_team_p2(p2_team, turn)
        
        p1_team = list(p1_team.values())
        p2_team = list(p2_team.values())

        for stat in ["real_hp","real_atk","real_def","real_spa","real_spe", "current_hp_pct"]:
            features = insert_stat(features, p1_team, stat, p1_status_fnt_count, "p1")
            features = insert_stat(features, p2_team, stat, p2_status_fnt_count, "p2")
            features[f"avg_{stat}_diff"] = features[f"p1_{stat}_mean"] - features[f"p2_{stat}_mean"]
        
        p1_last_status = [p["current_status"] for p in p1_team if p["current_status"] != "nostatus"]
        p2_last_status = [p["current_status"] for p in p2_team if p["current_status"] != "nostatus"]

        features["p1_count_last_status"] = len(p1_last_status)
        features["p2_count_last_status"] = len(p2_last_status)
        
        features["p1_status_fnt_count"] = p1_status_fnt_count
        features["p2_status_fnt_count"] = p2_status_fnt_count
        
        # --- il player ha quel pokemon ---
        for p in POKEDEX_OU:
            features[f"p1_has_{p}"] = int(any(p == poke["name"] for poke in p1_team))
            features[f"p2_has_{p}"] = int(any(p == poke["name"] for poke in p2_team))

        for m in ALL_MOVE:
            features[f"p1_pp_{m}"] = sum(poke.get("moves_used", {}).get(m, {}).get("pp", 0) for poke in p1_team)
            features[f"p2_pp_{m}"] = sum(poke.get("moves_used", {}).get(m, {}).get("pp", 0) for poke in p2_team)

        feature_list.append(features)  
    return pd.DataFrame(feature_list).fillna(0)

# Create feature DataFrames for both training and test sets
print("Processing training data...")
train_df = create_features(train_data)

print("\nProcessing test data...")
test_data = []
with open(test_file_path, 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
test_df = create_features(test_data)

print("\nTraining features preview:")
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 2000)
display(train_df.head())

Processing training data...


Extracting features:   0%|          | 0/9999 [00:00<?, ?it/s]


Processing test data...


Extracting features:   0%|          | 0/5000 [00:00<?, ?it/s]


Training features preview:


Unnamed: 0,battle_id,player_won,p1_real_hp_sum,p1_real_hp_mean,p1_real_hp_max,p1_real_hp_min,p2_real_hp_mean,p2_real_hp_max,p2_real_hp_min,avg_real_hp_diff,p1_real_atk_sum,p1_real_atk_mean,p1_real_atk_max,p1_real_atk_min,p2_real_atk_mean,p2_real_atk_max,p2_real_atk_min,avg_real_atk_diff,p1_real_def_sum,p1_real_def_mean,p1_real_def_max,p1_real_def_min,p2_real_def_mean,p2_real_def_max,p2_real_def_min,avg_real_def_diff,p1_real_spa_sum,p1_real_spa_mean,p1_real_spa_max,p1_real_spa_min,p2_real_spa_mean,p2_real_spa_max,p2_real_spa_min,avg_real_spa_diff,p1_real_spe_sum,p1_real_spe_mean,p1_real_spe_max,p1_real_spe_min,p2_real_spe_mean,p2_real_spe_max,p2_real_spe_min,avg_real_spe_diff,p1_current_hp_pct_sum,p1_current_hp_pct_mean,p1_current_hp_pct_max,p1_current_hp_pct_min,p2_current_hp_pct_mean,p2_current_hp_pct_max,p2_current_hp_pct_min,avg_current_hp_pct_diff,p1_count_last_status,p2_count_last_status,p1_status_fnt_count,p2_status_fnt_count,p1_has_alakazam,p2_has_alakazam,p1_has_chansey,p2_has_chansey,p1_has_cloyster,p2_has_cloyster,p1_has_exeggutor,p2_has_exeggutor,p1_has_gengar,p2_has_gengar,p1_has_jolteon,p2_has_jolteon,p1_has_jynx,p2_has_jynx,p1_has_rhydon,p2_has_rhydon,p1_has_snorlax,p2_has_snorlax,p1_has_starmie,p2_has_starmie,p1_has_tauros,p2_has_tauros,p1_has_zapdos,p2_has_zapdos,p1_has_dragonite,p2_has_dragonite,p1_has_golem,p2_has_golem,p1_has_charizard,p2_has_charizard,p1_has_victreebel,p2_has_victreebel,p1_has_lapras,p2_has_lapras,p1_has_slowbro,p2_has_slowbro,p1_has_articuno,p2_has_articuno,p1_has_persian,p2_has_persian,p1_pp_counter,p2_pp_counter,p1_pp_icebeam,p2_pp_icebeam,p1_pp_hyperbeam,p2_pp_hyperbeam,p1_pp_fireblast,p2_pp_fireblast,p1_pp_confuseray,p2_pp_confuseray,p1_pp_toxic,p2_pp_toxic,p1_pp_razorleaf,p2_pp_razorleaf,p1_pp_sleeppowder,p2_pp_sleeppowder,p1_pp_amnesia,p2_pp_amnesia,p1_pp_recover,p2_pp_recover,p1_pp_agility,p2_pp_agility,p1_pp_bodyslam,p2_pp_bodyslam,p1_pp_seismictoss,p2_pp_seismictoss,p1_pp_sing,p2_pp_sing,p1_pp_blizzard,p2_pp_blizzard,p1_pp_slash,p2_pp_slash,p1_pp_thunderbolt,p2_pp_thunderbolt,p2_boosts,p1_effect_reflect,p2_effect_reflect,p1_boosts,p1_effect_wrap,p1_effect_confusion,p2_effect_substitute,p2_effect_confusion,p1_effect_substitute,p1_effect_clamp,p1_effect_typechange,p2_effect_typechange,p2_effect_clamp,p2_effect_wrap
0,0,1,2215.0,443.0,703.0,313.0,413.0,523,323,30.0,1170.0,234.0,318.0,108.0,284.666667,318,248,-50.666667,1080.0,216.0,288.0,108.0,254.666667,268,228,-38.666667,1440.0,288.0,368.0,228.0,291.333333,348,228,-3.333333,1340.0,268.0,338.0,158.0,231.333333,328,158,36.666667,3.872814,0.774563,1.0,0.291022,0.5295,1.0,0.011472,0.245062,2,3,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,7,0,0,0,0,0,3,0,0,10,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1,1449.0,483.0,703.0,353.0,434.666667,703,313,48.333333,654.0,218.0,298.0,108.0,243.0,318,108,-25.0,714.0,238.0,318.0,108.0,224.666667,288,108,13.333333,804.0,268.0,308.0,238.0,298.0,368,228,-30.0,674.0,224.666667,318.0,158.0,258.0,338,158,-33.333333,1.58,0.526667,1.0,0.13,0.428333,0.77,0.26,0.098333,0,3,3,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,4,2,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,8,15,0,33,0,0,0,0,0,0,0,0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1,2305.0,461.0,703.0,313.0,420.5,703,313,40.5,1280.0,256.0,358.0,108.0,200.5,298,108,55.5,1150.0,230.0,338.0,108.0,193.0,288,108,37.0,1330.0,266.0,368.0,188.0,320.5,368,238,-54.5,1190.0,238.0,338.0,158.0,298.0,338,198,-60.0,4.18,0.836,1.0,0.52,0.798333,1.0,0.26,0.037667,1,2,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,12,0,0,14,0,0,0,0,17,0,0,0,0,0,0,0,0,9,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,1,1259.0,419.666667,523.0,353.0,405.5,523,353,14.166667,894.0,298.0,318.0,278.0,300.5,318,278,-2.5,784.0,261.333333,288.0,228.0,253.0,288,228,8.333333,814.0,271.333333,348.0,228.0,278.0,348,228,-6.666667,774.0,258.0,318.0,158.0,253.0,318,158,5.0,2.04,0.68,1.0,0.04,0.476667,1.0,0.01,0.203333,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,11,0,0,0,0,0,0,7,7,0,0,0,0,0,0,0,0,10,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,1,2065.0,413.0,703.0,303.0,459.0,703,323,-46.0,1180.0,236.0,298.0,108.0,252.0,318,108,-16.0,1310.0,262.0,458.0,108.0,232.0,288,108,30.0,1530.0,306.0,368.0,238.0,284.0,348,228,22.0,1300.0,260.0,338.0,198.0,242.0,328,158,18.0,3.76,0.752,1.0,0.42,0.525,1.0,0.04,0.227,2,4,1,0,1,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,0,0,0,0,0,0,0,0,0,8,9,0,0,15,11,0,0,4,9,8,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# TRAINING LOGISTIC REGRESSION MODEL
# ==================================

features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']

X_test = test_df[features]

print("Training model...")
model = make_pipeline(StandardScaler(), CatBoostClassifier(loss_function="Logloss",verbose=0,
                                random_seed=42,
                                random_strength = 2, 
                                learning_rate = np.float64(0.08), 
                                l2_leaf_reg = 1, 
                                iterations = 500, 
                                depth = 4, 
                                border_count = 32, 
                                bagging_temperature = 1
                                ))
model.fit(X_train, y_train)
print("Model training complete.")

#CrossValidatio Test
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_train, y_train, cv=cv, scoring="accuracy", n_jobs=-1)
print("LR 5-fold CV accuracy: %.4f ± %.4f" % (scores.mean(), scores.std()))

Training model...
Model training complete.
LR 5-fold CV accuracy: 0.8415 ± 0.0037


In [6]:
# MAKE PREDICTION ON THE TEST DATA
# ================================

print("Generating predictions on the test set...")
test_predictions = model.predict(X_test)

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})

# Save the DataFrame to a .csv file
submission_df.to_csv('submission.csv', index=False)
print("\n'submission.csv' file created successfully!")

Generating predictions on the test set...

'submission.csv' file created successfully!
