In [None]:
import numpy as np 
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier,GradientBoostingClassifier, AdaBoostClassifier
from sklearn.model_selection import cross_val_score,StratifiedKFold

## Import Data

In [None]:
X=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_selected.csv")
y=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_selected.npy")

## Instantiate Reliable Base Models (From ROC)

In [None]:
knn = KNeighborsClassifier(algorithm='ball_tree',n_neighbors=19,p=1,weights='distance')
rf = RandomForestClassifier(max_depth=20, max_features='log2', n_estimators=300, oob_score=True, ccp_alpha=0.0004)
svm = SVC(C=1.5, class_weight='balanced', gamma='scale', kernel='rbf',probability=True)

## Instantiate AdaBoost, Stacking Classifier and Stratified CV

In [None]:
estimator = AdaBoostClassifier(n_estimators=100,learning_rate=0.01)
model = StackingClassifier(estimators=[('svm',svm),('rf',rf),('knn',knn)],final_estimator=estimator)
n_folds = 5
cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

## Stacking with Adaboost

In [None]:
score=cross_val_score(model,X,y,cv=cv,n_jobs=-1)

In [None]:
score

In [None]:
print(f"Accuracy: {score.mean():.2f} (+/- {score.std():.2f})")

## Instantiate Gradient Boosting, Stacking Classifier

In [None]:
estimator = GradientBoostingClassifier(n_estimators=100, learning_rate=0.01,max_depth=100)
model = StackingClassifier(estimators=[('svm',svm),('rf',rf),('knn',knn)],final_estimator=estimator)

## Stacking with Gradient Boosting

In [None]:
score=cross_val_score(model,X,y,cv=cv,n_jobs=-1)

In [None]:
score

In [None]:
print(f"Accuracy: {score.mean():.2f} (+/- {score.std():.2f})")

## Inference
> ### Stacking with AdaBoost performed significantly better than Gradient Boosting. Hence Stacking with AdaBoost would be the Best ensemble which combines base models