In [1]:
import json
import pandas as pd
import os

# --- Define the path to our data ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('kaggle/input', COMPETITION_NAME)

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = '/kaggle/input/fds-pokemon-battles-prediction-2025/test.jsonl'
train_data = []

# Read the file line by line
print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            # json.loads() parses one line (one JSON object) into a Python dictionary
            train_data.append(json.loads(line))

    print(f"Successfully loaded {len(train_data)} battles.")

    # Let's inspect the first battle to see its structure
    print("\n--- Structure of the first train battle: ---")
    if train_data:
        first_battle = train_data[0]
        
        # To keep the output clean, we can create a copy and truncate the timeline
        battle_for_display = first_battle.copy()
        battle_for_display['battle_timeline'] = battle_for_display.get('battle_timeline', [])[25:30] # Show first 2 turns
        
        # Use json.dumps for pretty-printing the dictionary
        print(json.dumps(battle_for_display, indent=4))
        if len(first_battle.get('battle_timeline', [])) > 3:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")


except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

Loading data from '../input/fds-pokemon-battles-prediction-2025/train.jsonl'...
Successfully loaded 10000 battles.

--- Structure of the first train battle: ---
{
    "player_won": true,
    "p1_team_details": [
        {
            "name": "starmie",
            "level": 100,
            "types": [
                "psychic",
                "water"
            ],
            "base_hp": 60,
            "base_atk": 75,
            "base_def": 85,
            "base_spa": 100,
            "base_spd": 100,
            "base_spe": 115
        },
        {
            "name": "exeggutor",
            "level": 100,
            "types": [
                "grass",
                "psychic"
            ],
            "base_hp": 95,
            "base_atk": 95,
            "base_def": 85,
            "base_spa": 125,
            "base_spd": 125,
            "base_spe": 55
        },
        {
            "name": "chansey",
            "level": 100,
            "types": [
                "normal",

In [2]:
from tqdm.notebook import tqdm
import numpy as np


effectiveness = {
    'normal':   {'rock': 0.5, 'ghost': 0, 'notype': 1},
    'fire':     {'grass': 2, 'ice': 2, 'bug': 2, 'rock': 0.5, 'fire': 0.5, 'water': 0.5, 'dragon': 0.5},
    'water':    {'fire': 2, 'rock': 2, 'ground': 2, 'water': 0.5, 'grass': 0.5, 'dragon': 0.5},
    'electric': {'water': 2, 'flying': 2, 'ground': 0, 'electric': 0.5, 'grass': 0.5, 'dragon': 0.5},
    'grass':    {'water': 2, 'rock': 2, 'ground': 2, 'fire': 0.5, 'grass': 0.5, 'poison': 0.5, 'flying': 0.5, 'dragon': 0.5},
    'ice':      {'grass': 2, 'ground': 2, 'flying': 2, 'dragon': 2, 'fire': 0.5, 'ice': 0.5, 'water': 0.5},
    'poison':   {'grass': 2, 'poison': 0.5, 'ground': 0.5, 'rock': 0.5, 'ghost': 0.5},
    'ground':   {'fire': 2, 'electric': 2, 'poison': 2, 'rock': 2, 'grass': 0.5, 'flying': 0},
    'flying':   {'grass': 2, 'fighting': 2, 'bug': 2, 'rock': 0.5, 'electric': 0.5},
    'psychic':  {'poison': 2, 'fighting': 2, 'psychic': 0.5},
    'bug':      {'grass': 2, 'psychic': 2, 'poison': 0.5, 'fire': 0.5, 'flying': 0.5},
    'rock':     {'fire': 2, 'ice': 2, 'flying': 2, 'bug': 2, 'ground': 0.5},
    'ghost':    {'ghost': 2, 'psychic': 0},
    'dragon':   {'dragon': 2},
    'notype':   {}
}

def type_match_2(tp1, tp2):
    return effectiveness.get(tp1, {}).get(tp2, 1)

all_pokemon = {
        "alakazam": 0.2938,
        "articuno": 0.40405,
        "chansey": 0.934781,
        "charizard": 0.3778,
        "cloyster": 0.89314,
        "dragonite": 0.90042,
        "exeggutor": 0.84901,
        "gengar": 0.89242,
        "golem": 0.63721,
        "jolteon": 0.67436,
        "jynx": 0.19017,
        "lapras": 0.22678,
        "persian": 0.75462,
        "rhydon": 0.64732,
        "slowbro": 0.21631,
        "snorlax": 0.31173,
        "starmie": 0.10128,
        "tauros": 0.72923,
        "victreebel": 0.45483,
        "zapdos": 0.54324,
}

base_stats = {
        "alakazam": 405,
        "articuno": 485,
        "chansey": 415,
        "charizard": 425,
        "cloyster": 480,
        "dragonite": 500,
        "exeggutor": 455,
        "gengar": 425,
        "golem": 420,
        "jolteon": 430,
        "jynx": 340,
        "lapras": 450,
        "persian": 375,
        "rhydon": 440,
        "slowbro": 390,
        "snorlax": 430,
        "starmie": 435,
        "tauros": 450,
        "victreebel": 420,
        "zapdos": 490,
}

base_hp_atk_def_spe_sp = {
        "alakazam": [55,50,45,120,135],
        "articuno": [90,85,100,85,125],
        "chansey": [250,5,5,50,105],
        "charizard": [78,84,78,100,85],
        "cloyster": [50,95,180,70,85],
        "dragonite": [91,134,95,80,100],
        "exeggutor": [95,95,85,55,125],
        "gengar": [60,65,60,110,130],
        "golem": [80,120,130,45,55],
        "jolteon": [65,65,60,130,110],
        "jynx": [65,50,35,95,95],
        "lapras": [130,85,80,60,95],
        "persian": [65,70,60,115,65],
        "rhydon": [105,130,120,40,45],
        "slowbro": [95,75,110,30,80],
        "snorlax": [160,110,65,30,65],
        "starmie": [60,75,85,115,100],
        "tauros": [75,100,95,110,70],
        "victreebel": [80,105,65,70,100],
        "zapdos": [90,90,85,100,125],
}

types = {
        "alakazam": ["notype", "psychic"],
        "articuno": ["flying", "ice"],
        "chansey": ["normal", "notype"],
        "charizard": ["fire", "flying"],
        "cloyster": ["ice", "water"],
        "dragonite": ["dragon", "flying"],
        "exeggutor": ["grass", "psychic"],
        "gengar": ["ghost", "poison"],
        "golem": ["ground", "rock"],
        "jolteon": ["electric", "notype"],
        "jynx": ["ice", "psychic"],
        "lapras": ["ice", "water"],
        "persian": ["normal", "notype"],
        "rhydon": ["ground", "rock"],
        "slowbro": ["psychic", "water"],
        "snorlax": ["normal", "notype"],
        "starmie": ["psychic", "water"],
        "tauros": ["normal", "notype"],
        "victreebel": ["grass", "poison"],
        "zapdos": ["electric", "flying"]
}

status = {
    'fnt': 0.239,
    'slp': 0.457,
    'frz': 0.670,
    'nostatus':0,
    'par': 0.817,
    'tox': 0.143,
    'psn': 0.330,
    'brn': 0.598
}

effects = {
    'confusion': 0.365,
    'reflect': 0.934,
    'noeffect': 0.605,
    'wrap': 0.779,
    'substitute': 0.552,
    'clamp': 0.426,
    'typechange': 0.481,
    'firespin': 0.112,
    'disable': 0.517
}

good_effects = {'reflect','substitute','typechange'}
            

def create_better_features(data: list[dict]) -> pd.DataFrame:
    feature_list = []
    k=0
    for battle in tqdm(data, desc="Extracting better features"):
        features = {}

        # --- Player 1 Team Features ---
        p1_team = battle.get('p1_team_details', [])
        if p1_team:
            #Determino i membri della prima squadra 
            p1_team_members = set()
            for element in p1_team:
                p1_team_members.add(element['name'])
        

        # --- Player 2 Lead Features ---
        p2_lead = battle.get('p2_lead_details')
  

        # --- Battle timeline features ---
        timeline = battle.get('battle_timeline', [])

        if timeline:
            
            #Vita di ogni pokemon al 30 turno e squadra pokemon avversaria
            #stato di ogni pokemon al 30 turno
            
            remain_hp_p1={}
            remain_hp_p2={}

            remain_status_1 = {}
            remain_status_2 = {}

            p1_negative_status = 0
            p2_negative_status = 0

            remain_effects_1 = {}
            remain_effects_2 = {}

            p1_negative_effect = 0
            p2_negative_effect = 0
            p1_positive_effect = 0
            p2_positive_effect = 0

            p1_team_dimension = 6
            p2_team_dimension = 6

            p1_boosted_pokemon = {}
            p2_boosted_pokemon = {}

            p2_team_members = set()
            
            bst_1 = 0
            bst_2 = 0

            for t in timeline:   
                
                pok1=t.get('p1_pokemon_state')['name']
                lif1=t.get('p1_pokemon_state')['hp_pct']
                status_1 = t.get('p1_pokemon_state')['status']
                effect_1 = t.get('p1_pokemon_state')['effects']
                boost_1 = t.get('p1_pokemon_state')['boosts'].values()
                
                pok2=t.get('p2_pokemon_state')['name']
                lif2=t.get('p2_pokemon_state')['hp_pct']
                status_2 = t.get('p2_pokemon_state')['status']
                effect_2 = t.get('p2_pokemon_state')['effects']
                boost_2 = t.get('p2_pokemon_state')['boosts'].values()
                
                remain_hp_p1[pok1]=lif1
                remain_hp_p2[pok2]=lif2
                
                remain_status_1[pok1] = status_1
                remain_status_2[pok2] = status_2

                remain_effects_1[pok1] = effect_1
                remain_effects_2[pok2] = effect_2

                p1_boosted_pokemon[pok1] = sum(boost_1)  
                p2_boosted_pokemon[pok2] = sum(boost_2)

                #conto quante volte durante i trenta turni i pokemon erano sotto effect negativi/positivi

                for element in effect_1:   
                    if element in good_effects:
                        p1_positive_effect += 1
                    elif element != 'noeffect':
                        p1_negative_effect += 1

                for element in effect_2:   
                    if element in good_effects:
                        p2_positive_effect += 1
                    elif element != 'noeffect':
                        p2_negative_effect += 1
                                  
                        
                #stimo i pokemon della squadra avversaria
                p2_team_members.add(pok2)

                #ogni volta che un pokemon assume lo stato 'fnt' lo rimuoviamo dal team
                
                if status_1 == 'fnt':
                    p1_team_dimension-=1
                    p1_team_members.remove(pok1)
                
                if status_2 == 'fnt':
                    p2_team_dimension-=1
                    p2_team_members.remove(pok2)

            #insegno al modello che se la dimensione di un team è zero il team ha perso al 100%
            
            if p2_team_dimension == 0:
                features['certain_victory'] = 1
            elif p1_team_dimension == 0:
                features['certain_victory'] = 0
            else: features['certain_victory'] = 0.5
                
                
            # Stimo il numero di tipi presenti nei due team
            
            unique_types_2 = {t for p in p2_team_members for t in types[p]}
            unique_types_1 = {t for p in p1_team_members for t in types[p]}
            #features['p2_unique_types'] = len(unique_types_2)
            #features['p1_unique_types'] = len(unique_types_1)
            features['unique_types_diff'] = (len(unique_types_1) - len(unique_types_2))

            #stimo il vantaggio dei tipi di pokemon rimasti ai due giocatori
            tot_efficacy_1 = 0
            tot_efficacy_2 = 0
            
            super_efficacy_1 = 0
            super_efficacy_2 = 0
            
            low_efficacy_1 = 0
            low_efficacy_2 = 0
            
            for pok1 in p1_team_members:
                for pok2 in p2_team_members:
                    efficacy_1 = 1
                    efficacy_2 = 1
                    for tp1 in types[pok1]:
                        for tp2 in types[pok2]:
                            efficacy_1 *= type_match_2(tp1,tp2)
                            efficacy_2 *= type_match_2(tp2,tp1)
                    if efficacy_1 >= 2:
                        super_efficacy_1+= 1
                    if efficacy_1 <= 0.5:
                        low_efficacy_1 += 1
                    if efficacy_2 >= 2:
                        super_efficacy_2 += 1
                    if efficacy_2 <= 0.5:
                        low_efficacy_2 += 1
                    tot_efficacy_1 += efficacy_1
                    tot_efficacy_2 += efficacy_2
                    
            features['types_efficacy_diff'] = (tot_efficacy_1 - tot_efficacy_2)
            features['super_efficacy_diff'] = (super_efficacy_1 - super_efficacy_2)
            features['low_efficacy_diff'] = (low_efficacy_1 - low_efficacy_2)

           
            #stimo quanti pokemon sono in uno status negativo all'ultimo turno
        
            for value in list(remain_status_1.values()):
                if value != 'nostatus' and value != 'fnt':
                    p1_negative_status+=1
                    
            if p1_team_dimension != 0:
                features['p1_negative_status'] = p1_negative_status/p1_team_dimension
            else:
                features['p1_negative_status'] = 1
        
            for value in list(remain_status_2.values()):
                if value != 'nostatus' and value != 'fnt':
                    p2_negative_status+=1  
                    
            if p2_team_dimension != 0:
                features['p2_negative_status'] = p2_negative_status/p2_team_dimension
            else:
                features['p2_negative_status'] = 1
                
            features['negative_statuses_diff'] = (p1_negative_status - p2_negative_status)

            #stimo i pokemon rimanenti alle due squadre all'ultimo turno

            for pokemon in all_pokemon.keys():
                features['p1'+pokemon] = 1 if pokemon in p1_team_members else 0
               
            for pokemon in all_pokemon.keys():
                features['p2'+pokemon] = 1 if pokemon in p2_team_members else 0
                

            #stimo gli effect dei due pokemon all'ultimo turno

            k = 0
            for value in list(remain_effects_1.values()):
                string = 'p1_pok_effect'+str(k+1)
                features[string] = effects[value[0]]
                k+=1
            k = 0
            for value in list(remain_effects_2.values()):
                string = 'p2_pok_effect'+ str(k+1)
                features[string] = effects[value[0]]
                k+=1

            #Stimo i boost ricevuti dai pokemon ancora in piedi al trentesimo turno
            tot_boost_1 = 0
            tot_boost_2 = 0

            for pokemon in p1_team_members:
                if pokemon in p1_boosted_pokemon.keys():
                    tot_boost_1+= p1_boosted_pokemon[pokemon]
            
            for pokemon in p2_team_members:
                if pokemon in p2_boosted_pokemon.keys():
                    tot_boost_2+= p2_boosted_pokemon[pokemon]
            
            #features['boost_1'] = tot_boost_1
            #features['boost_2'] = tot_boost_2
            features['boost_diff'] = tot_boost_1 - tot_boost_2
            
 
            #stimo quanti pokemon nel corso dei trenta turni sono stati sottoposti a effect negativi/positivi

            #features['positive_effect_diff'] = p1_positive_effect - p2_positive_effect
            #features['negative_effect_diff'] = p1_negative_effect - p2_negative_effect

            
            #stimo i pokemon restanti alle due squadre al trentesimo turno
            
            #features['p1_remaining_pokemon'] = p1_team_dimension/6
            #features['p2_remaining_pokemon'] = p2_team_dimension/6
            features['remaining_pokemon_diff'] = (p1_team_dimension - p2_team_dimension)
    
          
            #Stimo le base_stat della squadra pokemon avversaria
            
            bst_media = sum(base_stats.values())/20
            
            for pokemon in p2_team_members:
                bst_2 += base_stats[pokemon]
            for pokemon in p1_team_members:
                bst_1 += base_stats[pokemon]
            
            features['bst_diff'] = (bst_1 - bst_2)
            #features['bst_1'] = bst_1
            #features['bst_2'] = bst_2

            #stimo la vita media rimanente alle due squadre all'ultimo turno
       
            #features['p1_mean_remaining_hp'] = sum(list(remain_hp_p1.values()))/6
            #features['p2_mean_remaining_hp'] = sum(list(remain_hp_p2.values()))/6
            features['remaining_hp_diff'] = (sum(list(remain_hp_p1.values())) - sum(list(remain_hp_p2.values())))
            


            #valuto le mosse dei due pokemon e la loro efficacia sull'altro pokemon

            tot_move1_efficacy = 0
            tot_move2_efficacy = 0

            p1_team_members_moves = set()
            p2_team_members_moves = set()

            turn_miss_1 = 0
            turn_miss_2 = 0

            damage_only_1 = 0
            damage_only_2 = 0

            tot_base_power_move_1 = 0
            tot_base_power_move_2 = 0

            tot_accuracy_move_1 = 0
            tot_accuracy_move_2 = 0

            tot_priority_move_1 = 0
            tot_priority_move_2 = 0
          
 
            for t in timeline:
                
                pok1=t.get('p1_pokemon_state')['name']
                pok2=t.get('p2_pokemon_state')['name']

                moves_1 = t.get('p1_move_details',[])
                moves_2 = t.get('p2_move_details',[])


                #Determini i tipi delle mosse dei pokemon rimasti alle squadre
                #Conto quante volte i pokemon non agiscono

                if not moves_1:
                    turn_miss_1 += 1

                if moves_1 and pok1 in p1_team_members: 
                    type_move_1 = t.get('p1_move_details')['type']
                    base_power_move_1 = t.get('p1_move_details')['base_power']
                    accuracy_move_1 = t.get('p1_move_details')['accuracy']
                    priority_move_1 = t.get('p1_move_details')['priority']
                    category_move_1 = t.get('p1_move_details')['category']
                    
                    if category_move_1 != 'STATUS':
                       p1_team_members_moves.add(type_move_1.lower())
                       if not moves_2:
                          damage_only_1 += 1

                    tot_base_power_move_1 += base_power_move_1
                    tot_accuracy_move_1 += accuracy_move_1
                    tot_priority_move_1 += priority_move_1
                        

                if not moves_2:
                    turn_miss_2 += 1

                if moves_2 and pok2 in p2_team_members:
                    type_move_2 = t.get('p2_move_details')['type']
                    base_power_move_2 = t.get('p2_move_details')['base_power']
                    accuracy_move_2 = t.get('p2_move_details')['accuracy']
                    priority_move_2 = t.get('p2_move_details')['priority']
                    category_move_2 = t.get('p2_move_details')['category']
                    
                    if category_move_2 != 'STATUS':
                       p2_team_members_moves.add(type_move_2.lower())
                       if not moves_1:
                          damage_only_2 += 1

                    tot_base_power_move_2 += base_power_move_2
                    tot_accuracy_move_2 += accuracy_move_2
                    tot_priority_move_2 += priority_move_2

            #Stimo differenza fra base_power,accuracy e priority delle mosse delle squadre nei trenta turni

            features['base_power_diff'] = tot_base_power_move_1 - tot_base_power_move_2
            features['accuracy_diff'] = tot_accuracy_move_1 - tot_accuracy_move_2
            features['priority_diff'] = tot_priority_move_1 - tot_priority_move_2
            

            #Stimo quante volte un pokemon ha usato una mossa offensiva e l'altro non ha mosso
            
            #features['damage_only_1'] = damage_only_1
            #features['damage_only_2'] = damage_only_2
            features['damage_only_diff'] = damage_only_1 - damage_only_2

            #Stimo quale squadra ha perso più turni di gioco
            
            #features['turn_miss_1'] = turn_miss_1
            #features['turn_miss_2'] = turn_miss_2
            #features['turn_miss_diff'] = (turn_miss_1 - turn_miss_2)
            
            #Ora confronto i tipi delle mosse di una squadra con i tipi dei pokemon dell'altra
            #Conto quale squadra ha più mosse di attacco super efficaci o a bassa efficacia sull'altra
            
            tot_efficacy_move_1 = 0
            tot_efficacy_move_2 = 0

            super_efficacy_move_1 = 0
            super_efficacy_move_2 = 0

            low_efficacy_move_1 = 0
            low_efficacy_move_2 = 0
    
            for move in p1_team_members_moves:
                for pok2 in p2_team_members:
                    efficacy_move_1 = 1
                    for tp2 in types[pok2]:
                        efficacy_move_1*= type_match_2(move,tp2)
                    if efficacy_move_1 >= 2:
                        super_efficacy_move_1 += 1
                    if efficacy_move_1 <= 0.5:
                        low_efficacy_move_1 += 1
                    tot_efficacy_move_1 += efficacy_move_1
                
            for move in p2_team_members_moves:
                for pok1 in p1_team_members:
                    efficacy_move_2 = 1
                    for tp1 in types[pok1]:
                        efficacy_move_2 *= type_match_2(move,tp1)
                    if efficacy_move_2 >= 2:
                        super_efficacy_move_2 += 1
                    if efficacy_move_2  <= 0.5:
                        low_efficacy_move_2 += 1
                    tot_efficacy_move_2 += efficacy_move_2

            features['moves_efficacy_diff'] = (tot_efficacy_move_1 - tot_efficacy_move_2)
            features['super_efficacy_move_diff'] = (super_efficacy_move_1 - super_efficacy_move_2)
            features['low_efficacy_move_diff'] = (low_efficacy_move_1 - low_efficacy_move_2)
         
        # --- Target + ID ---
        features['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])

        feature_list.append(features)
        k=1
    return pd.DataFrame(feature_list).fillna(0)


# Create feature DataFrames for both training and test sets
print("Processing training data...")
train_df = create_better_features(train_data)

print("\nProcessing test data...")
test_data = []
with open(test_file_path, 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
test_df = create_better_features(test_data)

print("\nTraining features preview:")
display(train_df.head())


Processing training data...


Extracting better features:   0%|          | 0/10000 [00:00<?, ?it/s]


Processing test data...


Extracting better features:   0%|          | 0/5000 [00:00<?, ?it/s]


Training features preview:


Unnamed: 0,certain_victory,unique_types_diff,types_efficacy_diff,super_efficacy_diff,low_efficacy_diff,p1_negative_status,p2_negative_status,negative_statuses_diff,p1alakazam,p1articuno,...,damage_only_diff,moves_efficacy_diff,super_efficacy_move_diff,low_efficacy_move_diff,battle_id,player_won,p1_pok_effect5,p1_pok_effect6,p2_pok_effect5,p2_pok_effect6
0,0.5,-1,-0.75,0,1,0.4,0.6,-1,1,0,...,2,-4.5,2,1,0,1,0.0,0.0,0.0,0.0
1,0.5,-1,-0.75,0,1,0.0,0.5,-3,0,0,...,-3,2.0,0,2,1,1,0.605,0.605,0.605,0.605
2,0.5,1,-0.25,0,-1,0.2,0.5,-2,0,0,...,-3,-10.5,-1,-2,2,1,0.0,0.0,0.0,0.0
3,0.5,-2,0.5,0,-1,0.0,0.166667,-1,0,0,...,-3,-3.0,0,-1,3,1,0.605,0.0,0.0,0.0
4,0.5,1,-0.5,-1,-1,0.4,0.666667,-2,1,0,...,2,1.25,0,-2,4,1,0.605,0.0,0.934,0.0


In [3]:
train_df.boost_diff.min()

-26

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define our features (X) and target (y)
features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train_start = train_df[features]
y_train_start = train_df['player_won']
seeds = np.random.randint(1, 42, size=20)
accs=[]
print("Training a simple Logistic Regression model...")
k=1
for seed in seeds:
    X_train, X_val, y_train, y_val = train_test_split(X_train_start,y_train_start, test_size=0.4, random_state=seed)
    X_test = test_df.reindex(columns=features, fill_value=0)
#X_test = test_df[features]
    # Standardizza le feature
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)  # Scaliamo anche il set di validazione
    X_test_scaled = scaler.transform(X_test)  # Scaliamo il set di test

    # Initialize and train the model
    #print("Training a simple Logistic Regression model...")
    model = LogisticRegression(random_state=78, max_iter=1000)
    model.fit(X_train_scaled, y_train)
    #print("Model training complete.")
    
    
    from sklearn.metrics import accuracy_score
    val_predictions = model.predict(X_val_scaled)
    accuracy=accuracy_score(y_val, val_predictions)
    print("Iterazione",str(k)+":",accuracy)
    accs.append(accuracy)
    k+=1
print("Mean accuracy:",np.mean(accs))

#Adesso alleniamo X_train su tutti i dati
X_full = X_train_start
y_full = y_train_start

# Standardizza le feature
scaler = StandardScaler()
X_full_scaled = scaler.fit_transform(X_full)
X_test_scaled = scaler.transform(test_df.reindex(columns=features, fill_value=0))

# Allena il modello su tutto il dataset
model = LogisticRegression(random_state=78, max_iter=1000)
model.fit(X_full_scaled, y_full)
#print("Model training complete.")

Training a simple Logistic Regression model...
Iterazione 1: 0.845
Iterazione 2: 0.8475
Iterazione 3: 0.85125
Iterazione 4: 0.849
Iterazione 5: 0.85175
Iterazione 6: 0.85225
Iterazione 7: 0.85125
Iterazione 8: 0.8595
Iterazione 9: 0.8475
Iterazione 10: 0.84575
Iterazione 11: 0.85675
Iterazione 12: 0.848
Iterazione 13: 0.85225
Iterazione 14: 0.8545
Iterazione 15: 0.84475
Iterazione 16: 0.853
Iterazione 17: 0.849
Iterazione 18: 0.84575
Iterazione 19: 0.85675
Iterazione 20: 0.848
Mean accuracy: 0.850475


In [5]:
# Make predictions on the test data
print("Generating predictions on the test set...")
test_predictions = model.predict(X_test_scaled)

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})

# Save the DataFrame to a .csv file
submission_df.to_csv('submission.csv', index=False)

print("\n'submission.csv' file created successfully!")
display(submission_df.head())

Generating predictions on the test set...

'submission.csv' file created successfully!


Unnamed: 0,battle_id,player_won
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
