Submission with stacking with logistic regression, knn, xgboost and random forest

In [None]:
import json
import pandas as pd
import os
import numpy as np

# --- Define the path to our data ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('../input', COMPETITION_NAME)

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')

In [None]:
train_data = []

print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            train_data.append(json.loads(line))
        print(f"Successfully loaded {len(train_data)} battles.")
        
    # Let's inspect the first battle to see its structure
    print("\n--- Structure of the first train battle: ---")
    if train_data:
        first_battle = train_data[0]
        
        battle_for_display = first_battle.copy()
        battle_for_display['battle_timeline'] = battle_for_display.get('battle_timeline', []) [:2] # Show first 2 turns
        
        print(json.dumps(battle_for_display, indent=4))
        if len(first_battle.get('battle_timeline', [])) > 2:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")


except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

In [None]:
test_data = []

print(f"Loading data from '{test_file_path}'...")
try:
    with open(test_file_path, 'r') as f:
        for line in f:
            test_data.append(json.loads(line))
    
    print("\n--- Structure of the first test battle: ---")
    if test_data:
            first_test_battle = test_data[0]
            
            test_battle_for_display = first_test_battle.copy()
            test_battle_for_display['battle_timeline'] = test_battle_for_display.get('battle_timeline', [])[:2] # Show first 2 turns
            
            print(json.dumps(test_battle_for_display, indent=4))
            if len(first_test_battle.get('battle_timeline', [])) > 3:
                print("    ...")
                print("    (battle_timeline has been truncated for display)")


except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{test_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

In [None]:
count_level = 0
for battle in train_data:
    squad = battle.get("p1_team_details")
    for pokemon in squad:
        level = pokemon.get("level")
        if level != 100:
            count_level += 1
print(count_level)

In [None]:
used_pokemon = set()
count = 0
for battle in train_data:
    battle_timeline = battle.get('battle_timeline', [])
    for i in battle_timeline:
        nome_p1 = i.get("p1_pokemon_state").get("name")
        nome_p2 = i.get("p2_pokemon_state").get("name")
        if nome_p1 not in used_pokemon:
            used_pokemon.add(nome_p1)
            count += 1
        if nome_p2 not in used_pokemon:
            used_pokemon.add(nome_p2)
            count += 1
print(count)

In [None]:
pokedex = {}

for battle in train_data:
    squad = battle.get("p1_team_details")
    for pokemon in squad:
        nome = pokemon.get("name")
        if nome not in pokedex:
            pokedex[nome] = pokemon
    pokemon_p2 = battle.get("p2_lead_details").get("name")
    if pokemon_p2 not in pokedex:
        pokedex[pokemon_p2] = battle.get("p2_lead_details")

print(len(pokedex))
#print(json.dumps(pokedex, indent=4))

# **Feature engineering** 
We create a function **create_simple_features** that takes as input a list of dictionaries (jsonl files) and returns a dataframe containing all the features we created.

In [None]:
from tqdm.notebook import tqdm
import numpy as np

def create_simple_features(data: list[dict]) -> pd.DataFrame:

    
    feature_list = []
    for battle in tqdm(data, desc="Extracting features"):
        features = {}

        
        # --- Player 1 Team Features ---
        p1_team = battle.get('p1_team_details', [])
        if p1_team:
            features['p1_mean_hp'] = np.mean([p.get('base_hp', 0) for p in p1_team])
            features['p1_mean_spe'] = np.mean([p.get('base_spe', 0) for p in p1_team])
            features['p1_mean_atk'] = np.mean([p.get('base_atk', 0) for p in p1_team])
            features['p1_mean_def'] = np.mean([p.get('base_def', 0) for p in p1_team])
            features['p1_mean_spa'] = np.mean([p.get('base_spa', 0) for p in p1_team])
            features['p1_mean_spd'] = np.mean([p.get('base_spd', 0) for p in p1_team])

        
        # --- Player 2 Team Features ---
        
        battle_timeline = battle.get('battle_timeline', [])
        p2_team = {}
        for i in battle_timeline:
            name_p2 = i.get("p2_pokemon_state", {}).get("name")
            if name_p2:
                p2_team[name_p2] = pokedex.get(name_p2, {})
        features['p2_mean_hp'] = np.mean([p2_team[p].get('base_hp', 0) for p in p2_team])
        features['p2_mean_spe'] = np.mean([p2_team[p].get('base_spe', 0) for p in p2_team])
        features['p2_mean_atk'] = np.mean([p2_team[p].get('base_atk', 0) for p in p2_team])
        features['p2_mean_def'] = np.mean([p2_team[p].get('base_def', 0) for p in p2_team])
        features['p2_mean_spa'] = np.mean([p2_team[p].get('base_spa', 0) for p in p2_team])
        features['p2_mean_spd'] = np.mean([p2_team[p].get('base_spd', 0) for p in p2_team])

        
        # --- Players' Status pokemon ---
        
        status_p1 = (sum(i.get("p1_pokemon_state", {}).get("status") != "nostatus" for i in battle_timeline))/len(battle_timeline)
        status_p2 = (sum(i.get("p2_pokemon_state", {}).get("status") != "nostatus" for i in battle_timeline))/len(battle_timeline)

        features['p1_status'] =  round(status_p1,3)
        features['p2_status'] =  round(status_p2,3)
        features["diff_status"] = features['p1_status'] - features['p2_status']


        # --- Players' Boosts ---
                
        boosts_p1 = sum(sum(i.get("p1_pokemon_state", {}).get("boosts", {}).values()) for i in battle_timeline)
        boosts_p2 = sum(sum(i.get("p2_pokemon_state", {}).get("boosts", {}).values()) for i in battle_timeline)
        
        features["boosts_p1"] = boosts_p1
        features["boosts_p2"] = boosts_p2
        features["diff_boost"] = boosts_p1 - boosts_p2
        
        
        # --- Players' move "null" ---
        
        null_1 = sum(not i.get("p1_move_details") for i in battle_timeline)
        null_2 = sum(not i.get("p2_move_details") for i in battle_timeline)
        
        features['null_p1'] = null_1
        features['null_p2'] = null_2

        
        # --- Players' accuracy ---
        
        acc_1 = 0
        n_acc1 = 0
        acc_2 = 0
        n_acc2 = 0
        
        for i in battle_timeline:
            
            if i.get('p1_move_details'):
                acc_1 += float(i.get('p1_move_details').get('accuracy'))
                n_acc1 += 1
            if i.get('p2_move_details'):
                acc_2 += float(i.get('p2_move_details').get('accuracy'))
                n_acc2 += 1
        
        if n_acc1 != 0:
            features['avg_acc_p1'] = acc_1/n_acc1
        else:
            features['avg_acc_p1'] = 0
        
        if n_acc2 != 0:
            features['avg_acc_p2'] = acc_2/n_acc2
        else:
            features['avg_acc_p2'] = 0
                
        
        features["diff_avg_acc"] = features['avg_acc_p1'] - features['avg_acc_p2']

        
        # --- Numbers of time in which each player attacks ---

        diz_p1 = {}
        count_p2 = 0
        for i in battle_timeline:
            nome = i.get('p1_pokemon_state').get('name')
            hp = i.get('p1_pokemon_state').get('hp_pct')
            if nome not in diz_p1:
                if int(hp) != 1:
                    diz_p1[nome] = hp
                    count_p2 += 1
                else:
                    diz_p1[nome] = hp
            else:
                if diz_p1[nome] > hp:
                    diz_p1[nome] = hp
                    count_p2 += 1
                else:
                    diz_p1[nome] = hp
        diz_p2 = {}
        count_p1 = 0
        for i in battle_timeline:
            nome = i.get('p2_pokemon_state').get('name')
            hp = i.get('p2_pokemon_state').get('hp_pct')
            if nome not in diz_p2:
                if int(hp) != 1:
                    diz_p2[nome] = hp
                    count_p1 += 1
                else:
                    diz_p2[nome] = hp
            else:
                if diz_p2[nome] > hp:
                    diz_p2[nome] = hp
                    count_p1 += 1
                else:
                    diz_p2[nome] = hp

        features['n_atk_p1'] = count_p1
        features['n_atk_p2'] = count_p2

        
        # --- Difference of damage inflicted ---

        diz_1 = {}
        def_p1 = 0 # sum of all hp (in percentage) lost by pokemon of player 1 
        diz_2 = {}
        atk_p1 = 0 # sum of all hp (in percentage) lost by pokemon of player 2
        
        for i in battle_timeline:
            # sum of p1 defense
            diff_p1 = 0
            nome_1 = i.get("p1_pokemon_state").get("name")
            hp_1 = i.get("p1_pokemon_state").get("hp_pct")
            if nome_1 not in diz_1:
                if int(hp_1) != 1:
                    diff_p1 = 1 - hp_1
                    diz_1[nome_1] = hp_1
                else:
                    diz_1[nome_1] = hp_1
            else:
                diff_p1 = diz_1[nome_1] - hp_1
                diz_1[nome_1] = hp_1
            def_p1 += diff_p1 
            
        for i in battle_timeline:
            # sum of p1 attack
            diff_p2 = 0
            nome_2 = i.get("p2_pokemon_state").get("name")
            hp_2 = i.get("p2_pokemon_state").get("hp_pct")   
            if nome_2 not in diz_2:
                if int(hp_2) != 1:
                    diff_p2 = 1 - hp_2
                    diz_2[nome_2] = hp_2
                else:
                    diz_2[nome_2] = hp_2
            else:
                diff_p2 = diz_2[nome_2] - hp_2
                diz_2[nome_2] = hp_2
            atk_p1 += diff_p2
        
        features["diff_damage"] = atk_p1 - def_p1

        
        # --- Count priority ---
        
        priority_1 = sum(i["p1_move_details"]["priority"] for i in battle_timeline if i.get("p1_move_details"))
        priority_2 = sum(i["p2_move_details"]["priority"] for i in battle_timeline if i.get("p2_move_details"))
        
        features["priority_1"] = priority_1
        features["priority_2"] = priority_2
        features["diff_priority"] = priority_1 - priority_2

                

        # --- Players' KO ---

        count_p1 = sum(i.get('p1_pokemon_state', {}).get('status') == 'fnt' for i in battle_timeline)
        count_p2 = sum(i.get('p2_pokemon_state', {}).get('status') == 'fnt' for i in battle_timeline)
        
        features['ko_p1'] = count_p1
        features['ko_p2'] = count_p2
        features["diff_ko"] = count_p1 - count_p2


        # --- Number of special attacks ---
        
        count_p1 = 0
        sp_atk_1 = 0
        count_p2 = 0
        sp_def_1 = 0
        
        for i in battle_timeline:
            if i.get("p1_move_details"):
                nome_p1 = i.get("p1_pokemon_state").get("name")
                if i.get('p1_move_details').get('category') == 'SPECIAL':
                    count_p1 += 1
                    sp_atk_1 += next(p.get('base_spa', 0) for p in p1_team if p.get("name") == nome_p1)
            if i.get("p2_move_details"):
                if i.get('p2_move_details').get('category') == 'SPECIAL':
                    count_p2 += 1
        
        features['n_sp_atk_1'] = count_p1
        features['n_sp_atk_2'] = count_p2
        features["diff_sp_atk"] = count_p1 - count_p2
        
        if count_p1 != 0:
            features["avg_sp_atk_1"] = sp_atk_1 / count_p1
        else:
            features["avg_sp_atk_1"] = 0
            
        
        # --- Number of times that each player uses blizard / ice beam ---
        
        count_move_p1_frz = 0
        count_move_p2_frz = 0

        count_move_p1_frz = sum(1 for turn in battle_timeline 
                                if turn.get("p1_move_details") 
                                and turn.get("p1_move_details").get("name") in ["blizzard", "icebeam"])
            
        count_move_p2_frz = sum(1 for turn in battle_timeline 
                                if turn.get("p2_move_details") 
                                and turn.get("p2_move_details").get("name") in ["blizzard", "icebeam"])
            
        features["move_p1_blz-icb"] = count_move_p1_frz
        features["move_p2_blz-icb"] = count_move_p2_frz 

      
        # --- Number of times that each player uses thunderwave ---
        
        count_move_p1_spe = 0
        count_move_p2_spe = 0

        count_move_p1_spe = sum(1 for turn in battle_timeline
                                if turn.get("p1_move_details") 
                                and turn.get("p1_move_details").get("name") == "thunderwave")
            
        count_move_p2_spe = sum(1 for turn in battle_timeline
                                if turn.get("p2_move_details") 
                                and turn.get("p2_move_details").get("name") == "thunderwave")
            
        features["move_p1_tw"] = count_move_p1_spe
        features["move_p2_tw"] = count_move_p1_spe

        
        # --- effects count ---

        effects_p1 = sum(1 for i in battle_timeline if i.get('p1_pokemon_state').get("effects")[0] != 'noeffect')
        effects_p2 = sum(1 for i in battle_timeline if i.get('p2_pokemon_state').get("effects")[0] != 'noeffect')
        
        features['p1_effects'] =  round((effects_p1/len(battle_timeline)),3)
        features['p2_effects'] =  round((effects_p2/len(battle_timeline)),3)
        features["diff_effetcs"] = features['p1_effects'] - features['p2_effects']


        # --- some interactions ---
        
        features["p2_status x null_p2"] = features["p2_status"]*null_2
        features["p1_status x null_p1"] = features["p1_status"]*null_1
        

            
        # ID and the target variable 
        
        features['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])
            
        feature_list.append(features)
        
    return pd.DataFrame(feature_list).fillna(0)

We call the function twice, first giving as input the train data and then the test data such that we obtain the two dataframe for training and test. Then we create the final datasets for training (removing ID's & target features) and for the test.

In [None]:
# Features' dataframe for training set

train_df = create_simple_features(train_data)

# Features' dataframe for training set

test_df = create_simple_features(test_data)

display(train_df.head(5))
display(train_df.tail(5))

train_df.describe()

In [None]:
# Defining our features (X) and target (y)

features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']
print (features)
X_test = test_df[features]

In the folliwing cell we create different subsets for each base model we want to put in the stacking.  
The idea is to create as much diversity as possibile between the model allowing them to be trained on the best possible set of features (giving them features that generates less confusion as possible). Then using the stacking ensamble method we hope to gain as mush information as possible from all the models.

In [None]:
features_lr = [feature for feature in features if feature not in ["avg_sp_atk_1", "diff_ko", 
                                                                 
                                                                 "avg_acc_p1", "avg_acc_p2","diff_sp_atk", "diff_status"
                                                                  ]]


features_knn = [feature for feature in features if feature not in ["avg_sp_atk_1", "diff_ko", 
                                                                 "diff_priority", "boosts_p1", "boosts_p2",
                                                                 "avg_acc_p1", "avg_acc_p2","diff_sp_atk", "diff_status",
                                                                  "p1_effects", "p2_effects","diff_effetcs",
                                                                  "null_p1","ko_p2","move_p1_blz-icb" , "move_p2_tw", 
                                                                  "p1_mean_spd", "move_p2_blz-icb","priority_2", 
                                                                  "diff_boost","p2_status x null_p2", "p2_mean_spa", 
                                                                  "p1_mean_def","p1_mean_spa", "ko_p1", 
                                                                  "p2_mean_spd", "p2_mean_def","p1_mean_atk",
                                                                  "n_sp_atk_1", "n_sp_atk_2","move_p1_tw" ]]


features_rf = [feature for feature in features if feature not in ["avg_sp_atk_1", "diff_ko", 
                                                                 "diff_priority", "boosts_p1", "boosts_p2",
                                                                 "avg_acc_p1", "avg_acc_p2","diff_sp_atk", "diff_status",
                                                                  "p1_effects", "p2_effects","diff_effetcs",]]



features_xgb = [feature for feature in features if feature not in ["avg_sp_atk_1", "diff_ko", 
                                                                 "diff_priority", "boosts_p1", "boosts_p2",
                                                                 "avg_acc_p1", "avg_acc_p2","diff_sp_atk", "diff_status",
                                                                  "p1_effects", "p2_effects","diff_effetcs",]]

print(len(features_lr), len(features_knn), len(features_rf), len(features_xgb))

**Scaling**  
Due to the fact that having features on different scales can create problems and confusion in some models (e.g. knn) we rescale all the variables to mean = 0 and standard deviation = 1.

In [None]:
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


X_test_scaled_df = pd.DataFrame(X_test_scaled, columns= features)
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns= features )
X_train_scaled_df.head()
X_train_scaled_df.describe()

We do the same standardization to every specific model's training and test sets. 

In [None]:
X_train_lr = train_df[features_lr]
X_test_lr = test_df[features_lr]
scaler = StandardScaler()
X_train_lr_scaled = scaler.fit_transform(X_train_lr)
X_test_lr_scaled = scaler.fit_transform(X_test_lr)

X_train_knn = train_df[features_knn]
X_test_knn = test_df[features_knn]
scaler = StandardScaler()
X_train_knn_scaled = scaler.fit_transform(X_train_knn)
X_test_knn_scaled = scaler.fit_transform(X_test_knn)

X_train_xgb = train_df[features_xgb]
X_test_xgb = test_df[features_xgb]
scaler = StandardScaler()
X_train_xgb_scaled = scaler.fit_transform(X_train_xgb)
X_test_xgb_scaled = scaler.fit_transform(X_test_xgb)

X_train_rf = train_df[features_rf]
X_test_rf = test_df[features_rf]
scaler = StandardScaler()
X_train_rf_scaled = scaler.fit_transform(X_train_rf)
X_test_rf_scaled = scaler.fit_transform(X_test_rf)

# **Building the model**

In this section, before coding the final model we compute every single model and set their hyperparameters in order to get the best possible performance:  
we implemented:
- Grid search for logistic regression
- elbow plot for knn

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, roc_auc_score

model = LogisticRegression(random_state=42, max_iter=1000)


param_grid = {
    'C': [0.1, 0.5, 1, 1.5, 2],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'lbfgs']
}



grid_logreg = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring=['roc_auc', 'accuracy'],
    n_jobs=4,        
    cv=5,            
    refit='accuracy',      #  the metric by which we want to take the best estimator
    return_train_score=True
)


grid_logreg.fit(X_train_lr_scaled, y_train)

cv_results_df = pd.DataFrame(grid_logreg.cv_results_)

predictions_lr = grid_logreg.best_estimator_.predict(X_test_lr_scaled)

print("Predicted labels:", predictions_lr[:10])

Displayining of the results obtained:
- value of accuracy of the model


In [None]:
cv_df = pd.DataFrame(grid_logreg.cv_results_)

best_idx = grid_logreg.best_index_

mean_acc = cv_df.loc[best_idx, 'mean_test_accuracy']

print("\naccuracy",mean_acc) 

best_log_reg = grid_logreg.best_estimator_

**KNN**

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

ks = range(1, 30)

scores = [
    cross_val_score(KNeighborsClassifier(n_neighbors=k), X_train_knn_scaled, y_train, cv=5).mean()
    for k in ks
]

plt.plot(ks, scores, marker='o')
plt.xlabel('k (number of neighbors)')
plt.ylabel('Cross-Validation Accuracy')
plt.title('Choosing the Optimal k in KNN')
plt.grid(True)
plt.show()

# elbow at 24

Computing accuracy of KNN on training set with cross-validation

In [None]:
acc_knn = cross_val_score(KNeighborsClassifier(n_neighbors = 24), X_train_scaled, y_train, cv = 5).mean()
print(acc_knn)

**XGBoost**

In [None]:
pip install xgboost -q

In [None]:
from xgboost import XGBClassifier


xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
acc_xgb = cross_val_score(xgb_clf, X_train_xgb_scaled, y_train, cv =5).mean()

print(acc_xgb)

**Random Forest**

In [None]:
from sklearn.ensemble import  RandomForestClassifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
acc_rf = cross_val_score(rf_clf, X_train_rf_scaled, y_train, cv = 5).mean()

print(acc_rf)

We create pipelines (one for each model) to put into the staking code.  
Every pipeline first gives to each model its own set of features (we used **ColumnTransformer** function to do this) and the computes the specified model.  
Thanks to ColumnTransformer and to "passthrough" we are able to give directly the features set to each model as it is.

In [None]:
log_clf = best_log_reg
knn_clf = KNeighborsClassifier(n_neighbors = 24)


from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
ct_logreg = ColumnTransformer([('sel', 'passthrough', features_lr)])
ct_knn    = ColumnTransformer([('sel', 'passthrough', features_knn)])
ct_rf     = ColumnTransformer([('sel', 'passthrough', features_rf)])
ct_xgb    = ColumnTransformer([('sel', 'passthrough', features_xgb)])


pipe_logreg = Pipeline([
    ('cols', ct_logreg),
    ('clf', log_clf)
])

pipe_knn = Pipeline([
    ('cols', ct_knn),
    ('clf', knn_clf)  
])

pipe_rf = Pipeline([
    ('cols', ct_rf),
    ('clf', rf_clf)  
])

pipe_xgb = Pipeline([
    ('cols', ct_xgb),
    ('clf', xgb_clf)
])

**Ensamble (Stacking)**  
Our choice for this submission is to use stacking ensamble technique using as models all the ones computed previously:
- logistic regression
- knn
- xgboost
- random forest
The idea is to use a logistic regression with inputs given by the labels predicted by the base models. The particularity is that now every base model is trained with its own set of features in order to try to reach the best performance possible.
To obtain the best performance with implemented also a grid search to the meta-learner. During the grid search it computes also a cross-validation, required to control overfitting.

In [None]:
from sklearn.ensemble import StackingClassifier

meta_learner = LogisticRegression(random_state = 42, max_iter = 1000)
stack_tot = StackingClassifier(
    estimators=[
        ('log', pipe_logreg),
        ('knn', pipe_knn),
        ('rf', pipe_rf),
        ('xgb', pipe_xgb),
    ],
    final_estimator=meta_learner,
    stack_method='predict_proba',  
    passthrough=False,             
    cv=5                           
)

In [None]:
param_grid = {
    'final_estimator__C': [0.1, 1, 1.5, 2],
    'final_estimator__penalty': ['l1', 'l2'],
    'final_estimator__solver': ['liblinear', 'lbfgs']
}

grid_tot = GridSearchCV(stack_tot, param_grid, scoring = ["roc_auc", "accuracy"], refit = "accuracy",n_jobs = 4 , cv=5)
grid_tot.fit(X_train_scaled_df, y_train)

predictions_stack_tot = grid_tot.best_estimator_.predict(X_test_scaled_df)


tot_results_df = pd.DataFrame(grid_tot.cv_results_)

best_idx = grid_tot.best_index_

acc_stack_tot = tot_results_df.loc[best_idx, 'mean_test_accuracy']
print(acc_stack_tot)

# **Creating the Submission File**  
The competition requires a `.csv` file with two columns: `battle_id` and `player_won`. Let's use our trained model to make predictions on the test set and format them correctly.

In [None]:
# Make predictions on the test data
print("Generating predictions on the test set...")


# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': predictions_stack_tot
})

# Save the DataFrame to a .csv file
submission_df.to_csv('submission.csv', index=False)

print("\n'submission.csv' file created successfully!")
display(submission_df.head())