In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, recall_score,precision_score, f1_score
import optuna

In [29]:
train=pd.read_csv('train.csv')

In [30]:
train.NObeyesdad=train.NObeyesdad.map({'Insufficient_Weight':0, 'Normal_Weight':1,'Overweight_Level_I':2,'Overweight_Level_II':3,'Obesity_Type_I':4,'Obesity_Type_II':5,'Obesity_Type_III':6})

In [31]:
categorical_train=train.select_dtypes('O')
numeric_train=train.drop(categorical_train.columns.tolist()+['id']+['NObeyesdad'],axis=1)

In [32]:
categorical_train.SCC=categorical_train.SCC.map({'yes':1,'no':0})
categorical_train.SMOKE=categorical_train.SMOKE.map({'yes':1,'no':0})
categorical_train.FAVC=categorical_train.FAVC.map({'yes':1,'no':0})
categorical_train.family_history_with_overweight=categorical_train.family_history_with_overweight.map({'yes':1,'no':0})
categorical_train.Gender=categorical_train.Gender.map({'Male':0,'Female':1})
categorical_train.CAEC=categorical_train.CAEC.map({'no':0,'Sometimes':1,'Frequently':2,'Always':3})
categorical_train.CALC=categorical_train.CALC.map({'no':0,'Sometimes':1,'Frequently':2})

In [33]:
cat_MRTANS=pd.get_dummies(categorical_train.MTRANS,drop_first=True,dtype='float')
categorical_train=categorical_train.drop('MTRANS',axis=1)
categorical_train=pd.concat([categorical_train,cat_MRTANS],axis=1)

In [34]:
# try input all the data
df=pd.concat([numeric_train,categorical_train,train.NObeyesdad],axis=1)
X=df.drop('NObeyesdad',axis=1).values
y=df.NObeyesdad.values

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [45]:
def objective(trial):
    """Define the objective function"""

    params = {
        'max_depth': trial.suggest_int('max_depth', 1, 9),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0,log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0,log=True),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0,log=True),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0,log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0,log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0,log=True),
        'eval_metric': 'mlogloss',
        'use_label_encoder': False
    }

    # Fit the model
    optuna_model = xgb.XGBClassifier(**params)
    optuna_model.fit(X_train, y_train)

    # Make predictions
    y_pred = optuna_model.predict(X_test)

    # Evaluate predictions
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [46]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

# Showing optimization results
print('Number of finished trials:', len(study.trials))
print('Best trial parameters:', study.best_trial.params)
print('Best score:', study.best_value)

[I 2024-02-05 22:16:27,119] A new study created in memory with name: no-name-cdaaaaba-8ea4-4866-bc91-b56c34f5eda0
[I 2024-02-05 22:16:30,263] Trial 0 finished with value: 0.8701027617212588 and parameters: {'max_depth': 9, 'learning_rate': 0.3037548928493046, 'n_estimators': 473, 'min_child_weight': 7, 'gamma': 3.0920467361931185e-07, 'subsample': 0.08803818809740105, 'colsample_bytree': 0.019093184801518528, 'reg_alpha': 1.6305254777554773e-08, 'reg_lambda': 5.827539547920008e-06}. Best is trial 0 with value: 0.8701027617212588.
[I 2024-02-05 22:16:33,173] Trial 1 finished with value: 0.8537251123956326 and parameters: {'max_depth': 4, 'learning_rate': 0.021421162821674927, 'n_estimators': 325, 'min_child_weight': 2, 'gamma': 9.461580682226961e-05, 'subsample': 0.023822191998824533, 'colsample_bytree': 0.20979891814839438, 'reg_alpha': 8.988297418454066e-06, 'reg_lambda': 0.000566525061723685}. Best is trial 0 with value: 0.8701027617212588.
[I 2024-02-05 22:16:35,388] Trial 2 finishe

Number of finished trials: 10
Best trial parameters: {'max_depth': 4, 'learning_rate': 0.045303065734425794, 'n_estimators': 373, 'min_child_weight': 8, 'gamma': 1.1787858874108236e-05, 'subsample': 0.640045819489975, 'colsample_bytree': 0.3713021692513898, 'reg_alpha': 2.503370660603248e-07, 'reg_lambda': 9.324954343411933e-05}
Best score: 0.9047848426461144


In [52]:
# run n_trial=100 in colab, and obtain the following paramters: 
xgbc_model=xgb.XGBClassifier(max_depth=5, learning_rate= 0.1, n_estimators=150, min_child_weight=8, gamma= 0.056, subsample= 0.376, colsample_bytree=0.885, reg_alpha=0.0037,reg_lambda= 0.013)

In [66]:
xgbc_model.fit(X_train,y_train)

In [54]:
xgbc_model.score(X_train,y_train)

0.9313833448038541

In [55]:
xgbc_model.score(X_test,y_test)

0.9070327552986512

In [56]:
test=pd.read_csv('test.csv')
id_series=test.id
test.SCC=test.SCC.map({'yes':1,'no':0})
test.SMOKE=test.SMOKE.map({'yes':1,'no':0})
test.FAVC=test.FAVC.map({'yes':1,'no':0})
test.family_history_with_overweight=test.family_history_with_overweight.map({'yes':1,'no':0})
test.Gender=test.Gender.map({'Male':0,'Female':1})
test.CAEC=test.CAEC.map({'no':0,'Sometimes':1,'Frequently':2,'Always':3})
test.CALC=test.CALC.map({'no':0,'Sometimes':1,'Frequently':2})
cat_test_MRTANS=pd.get_dummies(test.MTRANS,drop_first=True,dtype=float)
test=test.drop('MTRANS',axis=1)
test=pd.concat([test,cat_test_MRTANS],axis=1)
test=test.drop('id',axis=1)

In [57]:
input_order=df.columns.tolist()[:-1]

In [58]:
test=test[input_order]

In [59]:
y_pred_test=xgbc_model.predict(test.values)

In [60]:
result=pd.concat([id_series,pd.Series(y_pred_test)],axis=1)

In [61]:
ori={'Insufficient_Weight':0, 'Normal_Weight':1,'Overweight_Level_I':2,'Overweight_Level_II':3,'Obesity_Type_I':4,'Obesity_Type_II':5,'Obesity_Type_III':6}
rev={value:key for key, value in ori.items()}

In [62]:
result.columns=['id','NObeyesdad']

In [63]:
result.NObeyesdad=result.NObeyesdad.map(rev)

In [64]:
result.to_csv('result.csv',index=False)