In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, precision_score, recall_score
import pandas as pd
from sklearn.model_selection import train_test_split




modelx = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', n_estimators=300, 
                       max_depth=5, learning_rate=0.05, subsample=0.9, colsample_bytree=0.9, random_state=0)
modelx.fit(x_train, y_train)


importance = modelx.feature_importances_


features = pd.DataFrame({'Feature': x_train.columns, 'Importance': importance})


features = features.sort_values(by='Importance', ascending=False)

def evaluate_top_features(top_n):
    top_features = features.head(top_n)['Feature']

    x_train_top = x_train[top_features]
    x_test_top = x_test[top_features]
    
    modelx_top = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', n_estimators=300, 
                               max_depth=5, learning_rate=0.05, subsample=0.9, colsample_bytree=0.9, random_state=0)
    modelx_top.fit(x_train_top, y_train)
    y_predx_top = modelx_top.predict(x_test_top)
    y_pred_proba_top = modelx_top.predict_proba(x_test_top)
    

    accuracy = accuracy_score(y_test, y_predx_top)
    balanced_accuracy = balanced_accuracy_score(y_test, y_predx_top)
    auc_score = roc_auc_score(y_test, y_pred_proba_top, average='macro', multi_class='ovr')
    precision = precision_score(y_test, y_predx_top, average='macro')
    recall = recall_score(y_test, y_predx_top, average='macro')
    
    return {
        'Top N Features': top_n,
        'Accuracy': accuracy,
        'Balanced Accuracy': balanced_accuracy,
        'AUC Score': auc_score,
        'Precision': precision,
        'Recall': recall
    }
results = []
for top_n in [50, 100, 200, 400]:
    result = evaluate_top_features(top_n)
    results.append(result)

results_df = pd.DataFrame(results)
print(results_df)
