In [17]:
from sklearn.ensemble import VotingClassifier
import xgboost as xgb
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import roc_curve, precision_recall_curve, auc, make_scorer, recall_score, \
accuracy_score, precision_score, confusion_matrix, mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler 
from sklearn.ensemble import BaggingRegressor,BaggingClassifier,RandomForestRegressor,RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

# Ensemble Model

### Importing Data

In [18]:
train = pd.read_csv('train_clean.csv')
train = train.drop(['Unnamed: 0'],axis=1)
y_train = train['target']
X_train = train.drop('target',axis =1)

In [19]:
test = pd.read_csv('test_clean.csv')
test = test.drop(['Unnamed: 0'],axis=1)
y_test = test['target']
X_test = test.drop('target',axis =1)

In [20]:
# Scale the data
scaler = StandardScaler().fit(X_train)
X_train_scaled = pd.DataFrame(scaler.transform(X_train), columns = X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns = X_train.columns)

### Using Voting Regressor

In [None]:
#Tuned Adaboost model
model_ada = AdaBoostClassifier(random_state=1, n_estimators=600, learning_rate=0.2, base_estimator=DecisionTreeClassifier(max_depth=3)).fit(X_train,y_train)
test_accuracy_ada = model_ada.score(X_test,y_test)
    
#Tuned Random forest model
model_rf = RandomForestClassifier(random_state=1, max_depth = 17, max_leaf_nodes=37,max_features=20).fit(X_train, y_train)
test_accuracy_rf = model_rf.score(X_test,y_test) 
    
#Tuned gradient boosting model
model_gb = GradientBoostingClassifier(random_state=1, max_depth=7,n_estimators=600,learning_rate=0.1, subsample=1.0).fit(X_train,y_train)
test_accuracy_gb = model_gb.score(X_test,y_test) 

#Tuned XGBoost model
model_xgb = xgb.XGBClassifier(gamma= 0, learning_rate = 0.1, max_depth = 7, n_estimators = 650, reg_lambda = 0, subsample = 1).fit(X_train, y_train)
test_accuracy_xgb = model_xgb.score(X_test,y_test) 

# Baseline model
logistic_regression = LogisticRegression(random_state=1).fit(X_train, y_train)
log_reg_accuracy = logistic_regression.score(X_test, y_test)


print("Adaboost accuracy = ",test_accuracy_ada)
print("Random forest accuracy = ",test_accuracy_rf)
print("Gradient boost accuracy = ",test_accuracy_gb)
print("XGBoost model accuracy = ",test_accuracy_xgb)
print("Logistic Regression model accuracy = ",log_reg_accuracy)

### Hard Voting

In [None]:
ensemble_model_hard = VotingClassifier(estimators=[('ada',model_ada),('rf',model_rf),('gb',model_gb),('xgb',model_xgb), ('logreg', logistic_regression)])
ensemble_model_hard.fit(X_train,y_train)
y_pred = ensemble_model_hard.predict(X_test)
print("Test accuracy : ", ensemble_model_hard.score(X_test, y_test)*100, '%')
print("Recall on Test Data: ", recall_score(y_test, y_pred)*100 , '%')

### Soft Voting

In [None]:
ensemble_model_soft = VotingClassifier(estimators=[('ada',model_ada),('rf',model_rf),('gb',model_gb),('xgb',model_xgb), ('logreg', logistic_regression)],
                                 voting='soft')
ensemble_model_soft.fit(X_train,y_train)
y_pred = ensemble_model_soft.predict(X_test)
print("Test accuracy = ", ensemble_model_soft.score(X_test, y_test)*100, '%')
print("Recall on Test Data: ", recall_score(y_test, y_pred)*100 , '%')

## Stacking Classifier

## Tuning all Models Simultaneously