In [1]:
import os, sys
import pandas as pd
import lightgbm as lgb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


notebooks_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebooks_dir, os.pardir))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.data      import load_data, save_data
from src.features  import build_features
from src.model     import train_model, predict
from src.utils     import set_seed, compute_roc_auc
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc

set_seed(42)
sns.set(style="whitegrid")

In [4]:
test = load_data('../data/processed/test_scaled.csv')
print("Columnas en test:", test.columns.tolist())
ids = test['id']
drop_cols = ['id']
for col in ['Personality', 'target']:
    if col in test.columns:
        drop_cols.append(col)

X_test = test.drop(columns=drop_cols, errors='ignore')
print("X_test shape:", X_test.shape)

Columnas en test: ['id', 'Time_spent_Alone', 'Stage_fear', 'Social_event_attendance', 'Going_outside', 'Drained_after_socializing', 'Friends_circle_size', 'Post_frequency']
X_test shape: (6175, 7)


In [7]:
models = [lgb.Booster(model_file=f'../models/lgbm_fold{i}.txt') for i in range(5)]
preds  = predict(models, X_test)

In [8]:
submission = pd.DataFrame({
    'id': ids,
    'target': preds
})
display(submission.head())

Unnamed: 0,id,target
0,18524,0.039436
1,18525,0.886247
2,18526,0.073345
3,18527,0.04023
4,18528,0.870643


In [9]:
# Guardar
save_data(submission, '../submissions/submission.csv')
print("submissions/submission.csv creado.")


submissions/submission.csv creado.
