In [6]:
import pandas as pd
df_train = pd.read_csv('../Data/train.csv')
df_misnoneTrain = df_train.fillna("no value")
df_misnoneTrain
X = df_misnoneTrain.drop(columns=['Personality'])
y = df_misnoneTrain['Personality']


In [9]:
import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

# Prepare data
X = df_misnoneTrain.drop(columns=['Personality']).copy()
y = df_misnoneTrain['Personality']

# Convert all object columns to string type for OneHotEncoder compatibility
for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].astype(str)

# Encode target variable
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 3),
        'eval_metric': 'logloss',
        'random_state': 42
    }

    model = XGBClassifier(**param)
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_encoded, cv=cv, scoring='accuracy')
    trial.set_user_attr('params', param)
    print(f"[Trial {trial.number}] value: {scores.mean():.6f}, params: {param}")
    return scores.mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print("Best Params:", study.best_params)
print("Best CV Score:", study.best_value)

[I 2025-07-04 14:04:26,988] A new study created in memory with name: no-name-e6fcf547-3940-4285-92c5-c6124a4b9acd
[I 2025-07-04 14:04:29,310] Trial 0 finished with value: 0.9685814511357311 and parameters: {'n_estimators': 113, 'max_depth': 8, 'learning_rate': 0.05840816050844458, 'subsample': 0.707160657268785, 'colsample_bytree': 0.7574330025003639, 'gamma': 1.0757972851358337, 'reg_alpha': 2.594267817996071, 'reg_lambda': 2.4974716519912166}. Best is trial 0 with value: 0.9685814511357311.
[I 2025-07-04 14:04:29,310] Trial 0 finished with value: 0.9685814511357311 and parameters: {'n_estimators': 113, 'max_depth': 8, 'learning_rate': 0.05840816050844458, 'subsample': 0.707160657268785, 'colsample_bytree': 0.7574330025003639, 'gamma': 1.0757972851358337, 'reg_alpha': 2.594267817996071, 'reg_lambda': 2.4974716519912166}. Best is trial 0 with value: 0.9685814511357311.


[Trial 0] value: 0.968581, params: {'n_estimators': 113, 'max_depth': 8, 'learning_rate': 0.05840816050844458, 'subsample': 0.707160657268785, 'colsample_bytree': 0.7574330025003639, 'gamma': 1.0757972851358337, 'reg_alpha': 2.594267817996071, 'reg_lambda': 2.4974716519912166, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:31,337] Trial 1 finished with value: 0.9683115456026676 and parameters: {'n_estimators': 270, 'max_depth': 3, 'learning_rate': 0.10371201604274918, 'subsample': 0.7379356782947581, 'colsample_bytree': 0.8212803837238556, 'gamma': 3.1897843245164017, 'reg_alpha': 0.7744856970319051, 'reg_lambda': 1.4280727962341722}. Best is trial 0 with value: 0.9685814511357311.


[Trial 1] value: 0.968312, params: {'n_estimators': 270, 'max_depth': 3, 'learning_rate': 0.10371201604274918, 'subsample': 0.7379356782947581, 'colsample_bytree': 0.8212803837238556, 'gamma': 3.1897843245164017, 'reg_alpha': 0.7744856970319051, 'reg_lambda': 1.4280727962341722, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:36,923] Trial 2 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 682, 'max_depth': 5, 'learning_rate': 0.022721008733383664, 'subsample': 0.7299243221454034, 'colsample_bytree': 0.741573423812497, 'gamma': 1.2252051644659683, 'reg_alpha': 2.422999534712565, 'reg_lambda': 2.162293216766288}. Best is trial 2 with value: 0.9686354176686109.


[Trial 2] value: 0.968635, params: {'n_estimators': 682, 'max_depth': 5, 'learning_rate': 0.022721008733383664, 'subsample': 0.7299243221454034, 'colsample_bytree': 0.741573423812497, 'gamma': 1.2252051644659683, 'reg_alpha': 2.422999534712565, 'reg_lambda': 2.162293216766288, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:38,344] Trial 3 finished with value: 0.9685274846028511 and parameters: {'n_estimators': 179, 'max_depth': 7, 'learning_rate': 0.2331815760568527, 'subsample': 0.7912734582350447, 'colsample_bytree': 0.758179136700497, 'gamma': 1.9261355772725386, 'reg_alpha': 2.485990537877089, 'reg_lambda': 1.5753478643326524}. Best is trial 2 with value: 0.9686354176686109.


[Trial 3] value: 0.968527, params: {'n_estimators': 179, 'max_depth': 7, 'learning_rate': 0.2331815760568527, 'subsample': 0.7912734582350447, 'colsample_bytree': 0.758179136700497, 'gamma': 1.9261355772725386, 'reg_alpha': 2.485990537877089, 'reg_lambda': 1.5753478643326524, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:41,537] Trial 4 finished with value: 0.9684194932421601 and parameters: {'n_estimators': 830, 'max_depth': 4, 'learning_rate': 0.2162056708491376, 'subsample': 0.9709917522373372, 'colsample_bytree': 0.6607944433782993, 'gamma': 4.428367090882682, 'reg_alpha': 1.1738568201381179, 'reg_lambda': 0.9564347656898852}. Best is trial 2 with value: 0.9686354176686109.


[Trial 4] value: 0.968419, params: {'n_estimators': 830, 'max_depth': 4, 'learning_rate': 0.2162056708491376, 'subsample': 0.9709917522373372, 'colsample_bytree': 0.6607944433782993, 'gamma': 4.428367090882682, 'reg_alpha': 1.1738568201381179, 'reg_lambda': 0.9564347656898852, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:45,332] Trial 5 finished with value: 0.9685814657094639 and parameters: {'n_estimators': 766, 'max_depth': 9, 'learning_rate': 0.12541365382290925, 'subsample': 0.9014125415057612, 'colsample_bytree': 0.7364733514870574, 'gamma': 4.006758639900257, 'reg_alpha': 0.39332196196211555, 'reg_lambda': 0.5359520563343534}. Best is trial 2 with value: 0.9686354176686109.


[Trial 5] value: 0.968581, params: {'n_estimators': 766, 'max_depth': 9, 'learning_rate': 0.12541365382290925, 'subsample': 0.9014125415057612, 'colsample_bytree': 0.7364733514870574, 'gamma': 4.006758639900257, 'reg_alpha': 0.39332196196211555, 'reg_lambda': 0.5359520563343534, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:50,219] Trial 6 finished with value: 0.9685814657094639 and parameters: {'n_estimators': 823, 'max_depth': 6, 'learning_rate': 0.12693278486306417, 'subsample': 0.6438284516253344, 'colsample_bytree': 0.7421153221044458, 'gamma': 2.7548469963136046, 'reg_alpha': 0.916255882642716, 'reg_lambda': 2.1646255893045896}. Best is trial 2 with value: 0.9686354176686109.


[Trial 6] value: 0.968581, params: {'n_estimators': 823, 'max_depth': 6, 'learning_rate': 0.12693278486306417, 'subsample': 0.6438284516253344, 'colsample_bytree': 0.7421153221044458, 'gamma': 2.7548469963136046, 'reg_alpha': 0.916255882642716, 'reg_lambda': 2.1646255893045896, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:55,638] Trial 7 finished with value: 0.9683655267092803 and parameters: {'n_estimators': 999, 'max_depth': 6, 'learning_rate': 0.12943679341605896, 'subsample': 0.6987379934461144, 'colsample_bytree': 0.931945090288967, 'gamma': 2.3173319455749795, 'reg_alpha': 1.984732004972355, 'reg_lambda': 1.9270140610610305}. Best is trial 2 with value: 0.9686354176686109.


[Trial 7] value: 0.968366, params: {'n_estimators': 999, 'max_depth': 6, 'learning_rate': 0.12943679341605896, 'subsample': 0.6987379934461144, 'colsample_bytree': 0.931945090288967, 'gamma': 2.3173319455749795, 'reg_alpha': 1.984732004972355, 'reg_lambda': 1.9270140610610305, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:04:58,790] Trial 8 finished with value: 0.9686893987752235 and parameters: {'n_estimators': 293, 'max_depth': 4, 'learning_rate': 0.07886317352437897, 'subsample': 0.7185540670452826, 'colsample_bytree': 0.6664155089023451, 'gamma': 0.7689389017880349, 'reg_alpha': 1.5434804785366216, 'reg_lambda': 0.270140880915982}. Best is trial 8 with value: 0.9686893987752235.


[Trial 8] value: 0.968689, params: {'n_estimators': 293, 'max_depth': 4, 'learning_rate': 0.07886317352437897, 'subsample': 0.7185540670452826, 'colsample_bytree': 0.6664155089023451, 'gamma': 0.7689389017880349, 'reg_alpha': 1.5434804785366216, 'reg_lambda': 0.270140880915982, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:04,056] Trial 9 finished with value: 0.9687973755621819 and parameters: {'n_estimators': 824, 'max_depth': 10, 'learning_rate': 0.050394986160848615, 'subsample': 0.7560070961181827, 'colsample_bytree': 0.6009942272759066, 'gamma': 2.5574577751683947, 'reg_alpha': 0.0019229660796438175, 'reg_lambda': 1.820080966754477}. Best is trial 9 with value: 0.9687973755621819.


[Trial 9] value: 0.968797, params: {'n_estimators': 824, 'max_depth': 10, 'learning_rate': 0.050394986160848615, 'subsample': 0.7560070961181827, 'colsample_bytree': 0.6009942272759066, 'gamma': 2.5574577751683947, 'reg_alpha': 0.0019229660796438175, 'reg_lambda': 1.820080966754477, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:11,313] Trial 10 finished with value: 0.9658282106662236 and parameters: {'n_estimators': 526, 'max_depth': 10, 'learning_rate': 0.29510656660354573, 'subsample': 0.8621501372116864, 'colsample_bytree': 0.6064643760884312, 'gamma': 0.08231794320493258, 'reg_alpha': 0.0906450297618423, 'reg_lambda': 2.6264385395871757}. Best is trial 9 with value: 0.9687973755621819.


[Trial 10] value: 0.965828, params: {'n_estimators': 526, 'max_depth': 10, 'learning_rate': 0.29510656660354573, 'subsample': 0.8621501372116864, 'colsample_bytree': 0.6064643760884312, 'gamma': 0.08231794320493258, 'reg_alpha': 0.0906450297618423, 'reg_lambda': 2.6264385395871757, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:14,862] Trial 11 finished with value: 0.9685814365619981 and parameters: {'n_estimators': 458, 'max_depth': 3, 'learning_rate': 0.064256139713592, 'subsample': 0.632081578766733, 'colsample_bytree': 0.6051183133028362, 'gamma': 0.2481279586905586, 'reg_alpha': 1.6028246767726058, 'reg_lambda': 0.1342003524820603}. Best is trial 9 with value: 0.9687973755621819.


[Trial 11] value: 0.968581, params: {'n_estimators': 458, 'max_depth': 3, 'learning_rate': 0.064256139713592, 'subsample': 0.632081578766733, 'colsample_bytree': 0.6051183133028362, 'gamma': 0.2481279586905586, 'reg_alpha': 1.6028246767726058, 'reg_lambda': 0.1342003524820603, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:18,384] Trial 12 finished with value: 0.9685814365619979 and parameters: {'n_estimators': 372, 'max_depth': 10, 'learning_rate': 0.011604292774909902, 'subsample': 0.8045526942608521, 'colsample_bytree': 0.667109423030327, 'gamma': 3.45020273469812, 'reg_alpha': 1.690150037993498, 'reg_lambda': 1.0729835698820351}. Best is trial 9 with value: 0.9687973755621819.


[Trial 12] value: 0.968581, params: {'n_estimators': 372, 'max_depth': 10, 'learning_rate': 0.011604292774909902, 'subsample': 0.8045526942608521, 'colsample_bytree': 0.667109423030327, 'gamma': 3.45020273469812, 'reg_alpha': 1.690150037993498, 'reg_lambda': 1.0729835698820351, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:22,439] Trial 13 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 645, 'max_depth': 8, 'learning_rate': 0.07669491683455613, 'subsample': 0.8049100919386012, 'colsample_bytree': 0.664857693429463, 'gamma': 1.5266135260416513, 'reg_alpha': 1.1931863872955513, 'reg_lambda': 0.17245830758716219}. Best is trial 9 with value: 0.9687973755621819.


[Trial 13] value: 0.968635, params: {'n_estimators': 645, 'max_depth': 8, 'learning_rate': 0.07669491683455613, 'subsample': 0.8049100919386012, 'colsample_bytree': 0.664857693429463, 'gamma': 1.5266135260416513, 'reg_alpha': 1.1931863872955513, 'reg_lambda': 0.17245830758716219, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:28,756] Trial 14 finished with value: 0.9683114581602703 and parameters: {'n_estimators': 989, 'max_depth': 5, 'learning_rate': 0.1869737513128162, 'subsample': 0.6059598960975792, 'colsample_bytree': 0.8926981158517628, 'gamma': 1.024006436823608, 'reg_alpha': 2.943074227103056, 'reg_lambda': 0.6931391925525434}. Best is trial 9 with value: 0.9687973755621819.


[Trial 14] value: 0.968311, params: {'n_estimators': 989, 'max_depth': 5, 'learning_rate': 0.1869737513128162, 'subsample': 0.6059598960975792, 'colsample_bytree': 0.8926981158517628, 'gamma': 1.024006436823608, 'reg_alpha': 2.943074227103056, 'reg_lambda': 0.6931391925525434, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:31,987] Trial 15 finished with value: 0.968095606602484 and parameters: {'n_estimators': 344, 'max_depth': 4, 'learning_rate': 0.05044218686402645, 'subsample': 0.7685524675768942, 'colsample_bytree': 0.9890019374468941, 'gamma': 2.422196237737104, 'reg_alpha': 2.0137055372458668, 'reg_lambda': 2.9616561017836975}. Best is trial 9 with value: 0.9687973755621819.


[Trial 15] value: 0.968096, params: {'n_estimators': 344, 'max_depth': 4, 'learning_rate': 0.05044218686402645, 'subsample': 0.7685524675768942, 'colsample_bytree': 0.9890019374468941, 'gamma': 2.422196237737104, 'reg_alpha': 2.0137055372458668, 'reg_lambda': 2.9616561017836975, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:39,319] Trial 16 finished with value: 0.9675557226676925 and parameters: {'n_estimators': 597, 'max_depth': 7, 'learning_rate': 0.10046137498194381, 'subsample': 0.6733777969181969, 'colsample_bytree': 0.6800987072051734, 'gamma': 0.6197345408189553, 'reg_alpha': 0.4849828541730767, 'reg_lambda': 1.4708972086293497}. Best is trial 9 with value: 0.9687973755621819.


[Trial 16] value: 0.967556, params: {'n_estimators': 597, 'max_depth': 7, 'learning_rate': 0.10046137498194381, 'subsample': 0.6733777969181969, 'colsample_bytree': 0.6800987072051734, 'gamma': 0.6197345408189553, 'reg_alpha': 0.4849828541730767, 'reg_lambda': 1.4708972086293497, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:42,881] Trial 17 finished with value: 0.9684735034962385 and parameters: {'n_estimators': 478, 'max_depth': 9, 'learning_rate': 0.16046475744377747, 'subsample': 0.8599544933097777, 'colsample_bytree': 0.8157606870938318, 'gamma': 1.9477533979815282, 'reg_alpha': 1.298716560561668, 'reg_lambda': 1.810062944473441}. Best is trial 9 with value: 0.9687973755621819.


[Trial 17] value: 0.968474, params: {'n_estimators': 478, 'max_depth': 9, 'learning_rate': 0.16046475744377747, 'subsample': 0.8599544933097777, 'colsample_bytree': 0.8157606870938318, 'gamma': 1.9477533979815282, 'reg_alpha': 1.298716560561668, 'reg_lambda': 1.810062944473441, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:45,401] Trial 18 finished with value: 0.9686893987752235 and parameters: {'n_estimators': 253, 'max_depth': 5, 'learning_rate': 0.03460302764780437, 'subsample': 0.7616942485567815, 'colsample_bytree': 0.6312489699537098, 'gamma': 4.866837550597131, 'reg_alpha': 0.03556192485808495, 'reg_lambda': 1.1257609586163548}. Best is trial 9 with value: 0.9687973755621819.


[Trial 18] value: 0.968689, params: {'n_estimators': 253, 'max_depth': 5, 'learning_rate': 0.03460302764780437, 'subsample': 0.7616942485567815, 'colsample_bytree': 0.6312489699537098, 'gamma': 4.866837550597131, 'reg_alpha': 0.03556192485808495, 'reg_lambda': 1.1257609586163548, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:49,806] Trial 19 finished with value: 0.9685814511357311 and parameters: {'n_estimators': 912, 'max_depth': 4, 'learning_rate': 0.09426968295642146, 'subsample': 0.8322113480983249, 'colsample_bytree': 0.6984555364239424, 'gamma': 3.021780292237288, 'reg_alpha': 1.98493132796518, 'reg_lambda': 0.5952892403458588}. Best is trial 9 with value: 0.9687973755621819.


[Trial 19] value: 0.968581, params: {'n_estimators': 912, 'max_depth': 4, 'learning_rate': 0.09426968295642146, 'subsample': 0.8322113480983249, 'colsample_bytree': 0.6984555364239424, 'gamma': 3.021780292237288, 'reg_alpha': 1.98493132796518, 'reg_lambda': 0.5952892403458588, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:52,301] Trial 20 finished with value: 0.9683655267092803 and parameters: {'n_estimators': 385, 'max_depth': 8, 'learning_rate': 0.1636798881883365, 'subsample': 0.928214158141888, 'colsample_bytree': 0.852625164897748, 'gamma': 3.8497063106222607, 'reg_alpha': 0.8104874686686235, 'reg_lambda': 0.03140039855368332}. Best is trial 9 with value: 0.9687973755621819.


[Trial 20] value: 0.968366, params: {'n_estimators': 385, 'max_depth': 8, 'learning_rate': 0.1636798881883365, 'subsample': 0.928214158141888, 'colsample_bytree': 0.852625164897748, 'gamma': 3.8497063106222607, 'reg_alpha': 0.8104874686686235, 'reg_lambda': 0.03140039855368332, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:54,735] Trial 21 finished with value: 0.9687433798818361 and parameters: {'n_estimators': 221, 'max_depth': 5, 'learning_rate': 0.03494522969597784, 'subsample': 0.75172317226636, 'colsample_bytree': 0.6268077801120551, 'gamma': 4.493771520479482, 'reg_alpha': 0.08181001222525261, 'reg_lambda': 1.1711103245550862}. Best is trial 9 with value: 0.9687973755621819.


[Trial 21] value: 0.968743, params: {'n_estimators': 221, 'max_depth': 5, 'learning_rate': 0.03494522969597784, 'subsample': 0.75172317226636, 'colsample_bytree': 0.6268077801120551, 'gamma': 4.493771520479482, 'reg_alpha': 0.08181001222525261, 'reg_lambda': 1.1711103245550862, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:56,541] Trial 22 finished with value: 0.9684734743487727 and parameters: {'n_estimators': 105, 'max_depth': 5, 'learning_rate': 0.04547848314840447, 'subsample': 0.6894649629667609, 'colsample_bytree': 0.635058959889013, 'gamma': 4.734614342753246, 'reg_alpha': 0.44751359419217185, 'reg_lambda': 1.2077831912893782}. Best is trial 9 with value: 0.9687973755621819.


[Trial 22] value: 0.968473, params: {'n_estimators': 105, 'max_depth': 5, 'learning_rate': 0.04547848314840447, 'subsample': 0.6894649629667609, 'colsample_bytree': 0.635058959889013, 'gamma': 4.734614342753246, 'reg_alpha': 0.44751359419217185, 'reg_lambda': 1.2077831912893782, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:05:58,863] Trial 23 finished with value: 0.9686354468160765 and parameters: {'n_estimators': 240, 'max_depth': 6, 'learning_rate': 0.07962603038772503, 'subsample': 0.7452484726272139, 'colsample_bytree': 0.697287693450998, 'gamma': 3.7531748330503074, 'reg_alpha': 0.27147176034378395, 'reg_lambda': 0.36291649374250035}. Best is trial 9 with value: 0.9687973755621819.


[Trial 23] value: 0.968635, params: {'n_estimators': 240, 'max_depth': 6, 'learning_rate': 0.07962603038772503, 'subsample': 0.7452484726272139, 'colsample_bytree': 0.697287693450998, 'gamma': 3.7531748330503074, 'reg_alpha': 0.27147176034378395, 'reg_lambda': 0.36291649374250035, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:02,388] Trial 24 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 336, 'max_depth': 4, 'learning_rate': 0.011681595266204815, 'subsample': 0.6587032092835061, 'colsample_bytree': 0.6325385673284634, 'gamma': 4.236805969527233, 'reg_alpha': 0.6665384580595077, 'reg_lambda': 0.9025081583966774}. Best is trial 9 with value: 0.9687973755621819.


[Trial 24] value: 0.968635, params: {'n_estimators': 336, 'max_depth': 4, 'learning_rate': 0.011681595266204815, 'subsample': 0.6587032092835061, 'colsample_bytree': 0.6325385673284634, 'gamma': 4.236805969527233, 'reg_alpha': 0.6665384580595077, 'reg_lambda': 0.9025081583966774, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:04,798] Trial 25 finished with value: 0.9685814365619981 and parameters: {'n_estimators': 194, 'max_depth': 3, 'learning_rate': 0.04493915941294335, 'subsample': 0.7148187272011374, 'colsample_bytree': 0.6050355280712142, 'gamma': 1.8860072293956955, 'reg_alpha': 0.19188925624673453, 'reg_lambda': 1.7869552847480468}. Best is trial 9 with value: 0.9687973755621819.


[Trial 25] value: 0.968581, params: {'n_estimators': 194, 'max_depth': 3, 'learning_rate': 0.04493915941294335, 'subsample': 0.7148187272011374, 'colsample_bytree': 0.6050355280712142, 'gamma': 1.8860072293956955, 'reg_alpha': 0.19188925624673453, 'reg_lambda': 1.7869552847480468, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:11,146] Trial 26 finished with value: 0.9683654829880817 and parameters: {'n_estimators': 730, 'max_depth': 7, 'learning_rate': 0.08184670511361028, 'subsample': 0.7758302636828822, 'colsample_bytree': 0.7081235925526592, 'gamma': 0.7638151546481899, 'reg_alpha': 1.3980915355256442, 'reg_lambda': 0.7974809075650287}. Best is trial 9 with value: 0.9687973755621819.


[Trial 26] value: 0.968365, params: {'n_estimators': 730, 'max_depth': 7, 'learning_rate': 0.08184670511361028, 'subsample': 0.7758302636828822, 'colsample_bytree': 0.7081235925526592, 'gamma': 0.7638151546481899, 'reg_alpha': 1.3980915355256442, 'reg_lambda': 0.7974809075650287, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:14,484] Trial 27 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 459, 'max_depth': 5, 'learning_rate': 0.0349151638935887, 'subsample': 0.8290505839272401, 'colsample_bytree': 0.6443781587234255, 'gamma': 3.494399979132866, 'reg_alpha': 1.0274898763675875, 'reg_lambda': 0.371866706562653}. Best is trial 9 with value: 0.9687973755621819.


[Trial 27] value: 0.968635, params: {'n_estimators': 459, 'max_depth': 5, 'learning_rate': 0.0349151638935887, 'subsample': 0.8290505839272401, 'colsample_bytree': 0.6443781587234255, 'gamma': 3.494399979132866, 'reg_alpha': 1.0274898763675875, 'reg_lambda': 0.371866706562653, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:16,349] Trial 28 finished with value: 0.9684194932421601 and parameters: {'n_estimators': 182, 'max_depth': 4, 'learning_rate': 0.11374324427143764, 'subsample': 0.7444514826378037, 'colsample_bytree': 0.7863009343170981, 'gamma': 2.779915125016408, 'reg_alpha': 1.7518057800097377, 'reg_lambda': 2.197997262148065}. Best is trial 9 with value: 0.9687973755621819.


[Trial 28] value: 0.968419, params: {'n_estimators': 182, 'max_depth': 4, 'learning_rate': 0.11374324427143764, 'subsample': 0.7444514826378037, 'colsample_bytree': 0.7863009343170981, 'gamma': 2.779915125016408, 'reg_alpha': 1.7518057800097377, 'reg_lambda': 2.197997262148065, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:20,210] Trial 29 finished with value: 0.9687973755621815 and parameters: {'n_estimators': 552, 'max_depth': 9, 'learning_rate': 0.06267562554629254, 'subsample': 0.7050098813772965, 'colsample_bytree': 0.7125039026662394, 'gamma': 1.4932940170536928, 'reg_alpha': 2.201387597936767, 'reg_lambda': 1.285975627453819}. Best is trial 9 with value: 0.9687973755621819.


[Trial 29] value: 0.968797, params: {'n_estimators': 552, 'max_depth': 9, 'learning_rate': 0.06267562554629254, 'subsample': 0.7050098813772965, 'colsample_bytree': 0.7125039026662394, 'gamma': 1.4932940170536928, 'reg_alpha': 2.201387597936767, 'reg_lambda': 1.285975627453819, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:25,070] Trial 30 finished with value: 0.9687973755621819 and parameters: {'n_estimators': 547, 'max_depth': 9, 'learning_rate': 0.06275965860321345, 'subsample': 0.6110324269650991, 'colsample_bytree': 0.7168012819280905, 'gamma': 1.4874525731077504, 'reg_alpha': 2.267877049945755, 'reg_lambda': 1.2633957710561088}. Best is trial 9 with value: 0.9687973755621819.


[Trial 30] value: 0.968797, params: {'n_estimators': 547, 'max_depth': 9, 'learning_rate': 0.06275965860321345, 'subsample': 0.6110324269650991, 'colsample_bytree': 0.7168012819280905, 'gamma': 1.4874525731077504, 'reg_alpha': 2.267877049945755, 'reg_lambda': 1.2633957710561088, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:29,829] Trial 31 finished with value: 0.9687433944555691 and parameters: {'n_estimators': 595, 'max_depth': 9, 'learning_rate': 0.05798522429607596, 'subsample': 0.6146465147940149, 'colsample_bytree': 0.7147215454853523, 'gamma': 1.476029673572633, 'reg_alpha': 2.2723592654298006, 'reg_lambda': 1.3363360549099688}. Best is trial 9 with value: 0.9687973755621819.


[Trial 31] value: 0.968743, params: {'n_estimators': 595, 'max_depth': 9, 'learning_rate': 0.05798522429607596, 'subsample': 0.6146465147940149, 'colsample_bytree': 0.7147215454853523, 'gamma': 1.476029673572633, 'reg_alpha': 2.2723592654298006, 'reg_lambda': 1.3363360549099688, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:34,886] Trial 32 finished with value: 0.9686354322423437 and parameters: {'n_estimators': 568, 'max_depth': 9, 'learning_rate': 0.0620711128675689, 'subsample': 0.6131224484682051, 'colsample_bytree': 0.7909484378690512, 'gamma': 1.509143912429482, 'reg_alpha': 2.186342388032136, 'reg_lambda': 1.3975245843106767}. Best is trial 9 with value: 0.9687973755621819.


[Trial 32] value: 0.968635, params: {'n_estimators': 568, 'max_depth': 9, 'learning_rate': 0.0620711128675689, 'subsample': 0.6131224484682051, 'colsample_bytree': 0.7909484378690512, 'gamma': 1.509143912429482, 'reg_alpha': 2.186342388032136, 'reg_lambda': 1.3975245843106767, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:40,745] Trial 33 finished with value: 0.9685814365619982 and parameters: {'n_estimators': 637, 'max_depth': 10, 'learning_rate': 0.06798146936734858, 'subsample': 0.6355025855898911, 'colsample_bytree': 0.7159357963713614, 'gamma': 1.382813425641787, 'reg_alpha': 2.6633051898520943, 'reg_lambda': 1.281109814402742}. Best is trial 9 with value: 0.9687973755621819.


[Trial 33] value: 0.968581, params: {'n_estimators': 637, 'max_depth': 10, 'learning_rate': 0.06798146936734858, 'subsample': 0.6355025855898911, 'colsample_bytree': 0.7159357963713614, 'gamma': 1.382813425641787, 'reg_alpha': 2.6633051898520943, 'reg_lambda': 1.281109814402742, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:44,334] Trial 34 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 511, 'max_depth': 9, 'learning_rate': 0.09407446455844692, 'subsample': 0.6797365567990266, 'colsample_bytree': 0.7678856355258051, 'gamma': 2.1651439277518127, 'reg_alpha': 2.2511616569867843, 'reg_lambda': 1.6255329264497216}. Best is trial 9 with value: 0.9687973755621819.


[Trial 34] value: 0.968635, params: {'n_estimators': 511, 'max_depth': 9, 'learning_rate': 0.09407446455844692, 'subsample': 0.6797365567990266, 'colsample_bytree': 0.7678856355258051, 'gamma': 2.1651439277518127, 'reg_alpha': 2.2511616569867843, 'reg_lambda': 1.6255329264497216, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:49,817] Trial 35 finished with value: 0.9687433944555689 and parameters: {'n_estimators': 690, 'max_depth': 10, 'learning_rate': 0.05463508150137303, 'subsample': 0.6012922907877452, 'colsample_bytree': 0.8396656470376357, 'gamma': 1.156017451502545, 'reg_alpha': 2.7527627373823647, 'reg_lambda': 1.659091179048355}. Best is trial 9 with value: 0.9687973755621819.


[Trial 35] value: 0.968743, params: {'n_estimators': 690, 'max_depth': 10, 'learning_rate': 0.05463508150137303, 'subsample': 0.6012922907877452, 'colsample_bytree': 0.8396656470376357, 'gamma': 1.156017451502545, 'reg_alpha': 2.7527627373823647, 'reg_lambda': 1.659091179048355, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:55,023] Trial 36 finished with value: 0.9687433944555689 and parameters: {'n_estimators': 781, 'max_depth': 8, 'learning_rate': 0.02569010335065309, 'subsample': 0.6445380884703658, 'colsample_bytree': 0.7254291347633058, 'gamma': 1.7933773750769082, 'reg_alpha': 2.397958495274229, 'reg_lambda': 1.8923525272687305}. Best is trial 9 with value: 0.9687973755621819.


[Trial 36] value: 0.968743, params: {'n_estimators': 781, 'max_depth': 8, 'learning_rate': 0.02569010335065309, 'subsample': 0.6445380884703658, 'colsample_bytree': 0.7254291347633058, 'gamma': 1.7933773750769082, 'reg_alpha': 2.397958495274229, 'reg_lambda': 1.8923525272687305, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:06:59,689] Trial 37 finished with value: 0.9686894279226891 and parameters: {'n_estimators': 891, 'max_depth': 9, 'learning_rate': 0.11132723036948897, 'subsample': 0.6672262125995121, 'colsample_bytree': 0.7608377388844056, 'gamma': 1.7001528860606838, 'reg_alpha': 1.8342384235330471, 'reg_lambda': 1.3522765694191548}. Best is trial 9 with value: 0.9687973755621819.


[Trial 37] value: 0.968689, params: {'n_estimators': 891, 'max_depth': 9, 'learning_rate': 0.11132723036948897, 'subsample': 0.6672262125995121, 'colsample_bytree': 0.7608377388844056, 'gamma': 1.7001528860606838, 'reg_alpha': 1.8342384235330471, 'reg_lambda': 1.3522765694191548, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:02,958] Trial 38 finished with value: 0.9685814365619982 and parameters: {'n_estimators': 602, 'max_depth': 9, 'learning_rate': 0.1368545007338666, 'subsample': 0.6224795657382276, 'colsample_bytree': 0.6948102941877284, 'gamma': 2.6553919961432704, 'reg_alpha': 2.265011586959644, 'reg_lambda': 2.012060531904938}. Best is trial 9 with value: 0.9687973755621819.


[Trial 38] value: 0.968581, params: {'n_estimators': 602, 'max_depth': 9, 'learning_rate': 0.1368545007338666, 'subsample': 0.6224795657382276, 'colsample_bytree': 0.6948102941877284, 'gamma': 2.6553919961432704, 'reg_alpha': 2.265011586959644, 'reg_lambda': 2.012060531904938, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:06,270] Trial 39 finished with value: 0.9686354176686109 and parameters: {'n_estimators': 690, 'max_depth': 10, 'learning_rate': 0.28854954511627207, 'subsample': 0.7221558322283721, 'colsample_bytree': 0.7474652213333824, 'gamma': 2.107591003010602, 'reg_alpha': 2.4438306377110166, 'reg_lambda': 2.3895942469610514}. Best is trial 9 with value: 0.9687973755621819.


[Trial 39] value: 0.968635, params: {'n_estimators': 690, 'max_depth': 10, 'learning_rate': 0.28854954511627207, 'subsample': 0.7221558322283721, 'colsample_bytree': 0.7474652213333824, 'gamma': 2.107591003010602, 'reg_alpha': 2.4438306377110166, 'reg_lambda': 2.3895942469610514, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:10,849] Trial 40 finished with value: 0.9682575062011234 and parameters: {'n_estimators': 413, 'max_depth': 8, 'learning_rate': 0.1420534663264285, 'subsample': 0.6969153691104798, 'colsample_bytree': 0.7750215226275027, 'gamma': 0.4884399406848954, 'reg_alpha': 2.9445216391793267, 'reg_lambda': 0.9629846615301594}. Best is trial 9 with value: 0.9687973755621819.


[Trial 40] value: 0.968258, params: {'n_estimators': 413, 'max_depth': 8, 'learning_rate': 0.1420534663264285, 'subsample': 0.6969153691104798, 'colsample_bytree': 0.7750215226275027, 'gamma': 0.4884399406848954, 'reg_alpha': 2.9445216391793267, 'reg_lambda': 0.9629846615301594, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:16,271] Trial 41 finished with value: 0.9684194786684273 and parameters: {'n_estimators': 694, 'max_depth': 10, 'learning_rate': 0.05614048258016043, 'subsample': 0.6069683194029835, 'colsample_bytree': 0.8504733876861202, 'gamma': 1.0251893115446462, 'reg_alpha': 2.7059174033072306, 'reg_lambda': 1.6379166264305023}. Best is trial 9 with value: 0.9687973755621819.


[Trial 41] value: 0.968419, params: {'n_estimators': 694, 'max_depth': 10, 'learning_rate': 0.05614048258016043, 'subsample': 0.6069683194029835, 'colsample_bytree': 0.8504733876861202, 'gamma': 1.0251893115446462, 'reg_alpha': 2.7059174033072306, 'reg_lambda': 1.6379166264305023, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:20,352] Trial 42 finished with value: 0.9683115310289347 and parameters: {'n_estimators': 553, 'max_depth': 10, 'learning_rate': 0.05490685059480259, 'subsample': 0.6559754173916164, 'colsample_bytree': 0.8576488068736978, 'gamma': 1.3025112550715476, 'reg_alpha': 2.790989816603819, 'reg_lambda': 1.5443014348672501}. Best is trial 9 with value: 0.9687973755621819.


[Trial 42] value: 0.968312, params: {'n_estimators': 553, 'max_depth': 10, 'learning_rate': 0.05490685059480259, 'subsample': 0.6559754173916164, 'colsample_bytree': 0.8576488068736978, 'gamma': 1.3025112550715476, 'reg_alpha': 2.790989816603819, 'reg_lambda': 1.5443014348672501, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:25,712] Trial 43 finished with value: 0.96847345977504 and parameters: {'n_estimators': 792, 'max_depth': 9, 'learning_rate': 0.06970394310618355, 'subsample': 0.6295576687008154, 'colsample_bytree': 0.8335772707545395, 'gamma': 1.129057812455817, 'reg_alpha': 2.581903809927726, 'reg_lambda': 1.7021806868705776}. Best is trial 9 with value: 0.9687973755621819.


[Trial 43] value: 0.968473, params: {'n_estimators': 792, 'max_depth': 9, 'learning_rate': 0.06970394310618355, 'subsample': 0.6295576687008154, 'colsample_bytree': 0.8335772707545395, 'gamma': 1.129057812455817, 'reg_alpha': 2.581903809927726, 'reg_lambda': 1.7021806868705776, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:30,154] Trial 44 finished with value: 0.9683654975618146 and parameters: {'n_estimators': 735, 'max_depth': 10, 'learning_rate': 0.08774312478094996, 'subsample': 0.6030820564374447, 'colsample_bytree': 0.9120250221561801, 'gamma': 1.6744561337378001, 'reg_alpha': 2.0782498472890767, 'reg_lambda': 2.010797719184403}. Best is trial 9 with value: 0.9687973755621819.


[Trial 44] value: 0.968365, params: {'n_estimators': 735, 'max_depth': 10, 'learning_rate': 0.08774312478094996, 'subsample': 0.6030820564374447, 'colsample_bytree': 0.9120250221561801, 'gamma': 1.6744561337378001, 'reg_alpha': 2.0782498472890767, 'reg_lambda': 2.010797719184403, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:35,996] Trial 45 finished with value: 0.9685814365619982 and parameters: {'n_estimators': 633, 'max_depth': 8, 'learning_rate': 0.025225411424639688, 'subsample': 0.6467188518721247, 'colsample_bytree': 0.8055660961923088, 'gamma': 0.9719182618997017, 'reg_alpha': 2.548683522166061, 'reg_lambda': 1.461362938599529}. Best is trial 9 with value: 0.9687973755621819.


[Trial 45] value: 0.968581, params: {'n_estimators': 633, 'max_depth': 8, 'learning_rate': 0.025225411424639688, 'subsample': 0.6467188518721247, 'colsample_bytree': 0.8055660961923088, 'gamma': 0.9719182618997017, 'reg_alpha': 2.548683522166061, 'reg_lambda': 1.461362938599529, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:40,044] Trial 46 finished with value: 0.9682035688157093 and parameters: {'n_estimators': 881, 'max_depth': 10, 'learning_rate': 0.2186443572960969, 'subsample': 0.7064926200102957, 'colsample_bytree': 0.8754467172896611, 'gamma': 2.4963685324129155, 'reg_alpha': 2.3720786978468977, 'reg_lambda': 1.0201944837369137}. Best is trial 9 with value: 0.9687973755621819.


[Trial 46] value: 0.968204, params: {'n_estimators': 881, 'max_depth': 10, 'learning_rate': 0.2186443572960969, 'subsample': 0.7064926200102957, 'colsample_bytree': 0.8754467172896611, 'gamma': 2.4963685324129155, 'reg_alpha': 2.3720786978468977, 'reg_lambda': 1.0201944837369137, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:45,598] Trial 47 finished with value: 0.9684194640946944 and parameters: {'n_estimators': 517, 'max_depth': 9, 'learning_rate': 0.11313596495219697, 'subsample': 0.6208495729969231, 'colsample_bytree': 0.733180876734072, 'gamma': 0.4509937942513198, 'reg_alpha': 2.7901679842928178, 'reg_lambda': 1.3001098305818148}. Best is trial 9 with value: 0.9687973755621819.


[Trial 47] value: 0.968419, params: {'n_estimators': 517, 'max_depth': 9, 'learning_rate': 0.11313596495219697, 'subsample': 0.6208495729969231, 'colsample_bytree': 0.733180876734072, 'gamma': 0.4509937942513198, 'reg_alpha': 2.7901679842928178, 'reg_lambda': 1.3001098305818148, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:52,324] Trial 48 finished with value: 0.9688513566687945 and parameters: {'n_estimators': 845, 'max_depth': 9, 'learning_rate': 0.010466391516671406, 'subsample': 0.6828234878032978, 'colsample_bytree': 0.6747834486086363, 'gamma': 2.226990760463745, 'reg_alpha': 2.7876067571648924, 'reg_lambda': 2.3046720773646947}. Best is trial 48 with value: 0.9688513566687945.


[Trial 48] value: 0.968851, params: {'n_estimators': 845, 'max_depth': 9, 'learning_rate': 0.010466391516671406, 'subsample': 0.6828234878032978, 'colsample_bytree': 0.6747834486086363, 'gamma': 2.226990760463745, 'reg_alpha': 2.7876067571648924, 'reg_lambda': 2.3046720773646947, 'eval_metric': 'logloss', 'random_state': 42}


[I 2025-07-04 14:07:59,072] Trial 49 finished with value: 0.9687973609884487 and parameters: {'n_estimators': 831, 'max_depth': 7, 'learning_rate': 0.012666791560954654, 'subsample': 0.681699728976839, 'colsample_bytree': 0.6531121686893251, 'gamma': 2.236766337544932, 'reg_alpha': 2.1547472409434425, 'reg_lambda': 2.5302382291846524}. Best is trial 48 with value: 0.9688513566687945.


[Trial 49] value: 0.968797, params: {'n_estimators': 831, 'max_depth': 7, 'learning_rate': 0.012666791560954654, 'subsample': 0.681699728976839, 'colsample_bytree': 0.6531121686893251, 'gamma': 2.236766337544932, 'reg_alpha': 2.1547472409434425, 'reg_lambda': 2.5302382291846524, 'eval_metric': 'logloss', 'random_state': 42}
Best Params: {'n_estimators': 845, 'max_depth': 9, 'learning_rate': 0.010466391516671406, 'subsample': 0.6828234878032978, 'colsample_bytree': 0.6747834486086363, 'gamma': 2.226990760463745, 'reg_alpha': 2.7876067571648924, 'reg_lambda': 2.3046720773646947}
Best CV Score: 0.9688513566687945


In [11]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from xgboost import XGBClassifier
import joblib

# Use df_misnoneTrain for modeling
df = df_misnoneTrain.copy()

# Convert all object columns to string type for OneHotEncoder compatibility
for col in df.select_dtypes(include=['object']).columns:
    df[col] = df[col].astype(str)

categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
if 'Personality' in categorical_cols:
    categorical_cols.remove('Personality')
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Encode target variable
le = LabelEncoder()
y_encoded = le.fit_transform(df['Personality'])

# Preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Create pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(
        n_estimators=845,
        max_depth=9,
        learning_rate=0.010466391516671406,
        subsample=0.6828234878032978,
        colsample_bytree=0.6747834486086363,
        gamma=2.226990760463745,
        reg_alpha=2.7876067571648924,
        reg_lambda=2.3046720773646947,
        eval_metric='logloss',
        random_state=42
    ))
])

# Fit pipeline
pipeline.fit(df.drop('Personality', axis=1), y_encoded)

# Save pipeline and label encoder
joblib.dump({'pipeline': pipeline, 'label_encoder': le}, 'final_model_pipeline_misnone.pkl')

['final_model_pipeline_misnone.pkl']

In [None]:
import numpy as np
df_submission1 = pd.read_csv('../Data/test.csv')

for col in df_submission1.select_dtypes(include=['object']).columns:
    df_submission1[col] = df_submission1[col].astype(str)

df_submission1 = df_submission1.replace([np.nan, np.inf, -np.inf], 'no value')


model_bundle = joblib.load('final_model_pipeline_misnone.pkl')
pipeline = model_bundle['pipeline']
le = model_bundle['label_encoder']

preds_numeric = pipeline.predict(df_submission1)
preds_label = le.inverse_transform(preds_numeric)

submission = pd.DataFrame({
    'id': df_submission1['id'],
    'Personality': preds_label
})

submission.to_csv('submission1.csv', index=False)
submission.head()

Unnamed: 0,id,Personality
0,18524,Extrovert
1,18525,Introvert
2,18526,Extrovert
3,18527,Extrovert
4,18528,Introvert
