In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the data
data = pd.read_csv('ac.csv')

# Step 2: Separate features and labels
X = data.iloc[:, :-1]  # Features (all columns except the last one)
y = data.iloc[:, -1]   # Labels (the last column)

# Adjust labels to start from 0, if necessary
y = y - 1

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define the base classifiers
xgb_clf = XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss')
gbm_clf = GradientBoostingClassifier(random_state=42)

# Step 5: Define the parameter grids for each classifier
xgb_param_grid = {
    'xgb__max_depth': [3, 4, 5],
    'xgb__alpha': [1, 10],
    'xgb__learning_rate': [0.01, 0.1, 1.0],
    'xgb__n_estimators': [100, 200]
}

gbm_param_grid = {
    'gbm__n_estimators': [100, 200],
    'gbm__max_depth': [3, 4, 5],
    'gbm__learning_rate': [0.01, 0.1, 1.0]
}

# Step 6: Define the stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('xgb', xgb_clf),
        ('gbm', gbm_clf)
    ],
    final_estimator=XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss'),
    n_jobs=-1
)

# Step 7: Perform GridSearchCV to find the best hyperparameters for the stacking classifier
param_grid = {**xgb_param_grid, **gbm_param_grid}
grid_search = GridSearchCV(stacking_clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Step 8: Evaluate the best estimator found by GridSearchCV
best_clf = grid_search.best_estimator_

# Perform cross-validation on the training set
cv_scores = cross_val_score(best_clf, X_train, y_train, cv=5, scoring='accuracy')

# Print cross-validation scores
print(f'Cross-validation scores: {cv_scores}')
print(f'Mean cross-validation score: {cv_scores.mean():.2f}')

# Step 9: Evaluate the classifier on the test set
y_pred = best_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy on test set: {accuracy:.2f}')

# Print detailed classification report
print(classification_report(y_test, y_pred))

# Print the best parameters found by GridSearchCV
print(f'Best parameters: {grid_search.best_params_}')


Fitting 5 folds for each of 648 candidates, totalling 3240 fits
