**Ensemble learning using Bagging**

In [None]:
#importing necessary packages and the data set
import pandas as pd
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
data=pd.read_excel("data.xlsx")
#Seprating dependent variable from independent variables
X=data.iloc[:,0:8]
Y=data["Outcome"]

In [None]:
# Splitting the X and Y into the
# Training set and Testing set
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X, Y, test_size = 0.2, random_state = 50,shuffle=True)

In [None]:
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda= LDA(n_components=1)
X_train_lda= lda.fit_transform(X_train, y_train)
X_test_lda= lda.transform(X_test)

In [None]:
# Define base model
base_model = DecisionTreeClassifier()

In [None]:
# Define bagging model
bagging_model = BaggingClassifier(base_estimator=base_model, n_estimators=10, random_state=42)
#the number of estimators refers to the number of base models that are trained on different subsets of the training data.

**We try ensemble learning with and without feature extraction**

In [None]:
# Train bagging model
bagging_model.fit(X_train, y_train)
#bagging_model.fit(X_train_lda, y_train)



In [None]:
# Make predictions on the test data
y_pred_bagging = bagging_model.predict(X_test)
#y_pred_bagging = bagging_model.predict(X_test_lda)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_pred_bagging,y_test))

              precision    recall  f1-score   support

           0       0.79      0.79      0.79        98
           1       0.63      0.64      0.64        56

    accuracy                           0.73       154
   macro avg       0.71      0.71      0.71       154
weighted avg       0.73      0.73      0.73       154



**Ensemble learning using boosting**

**XG Boost modelling**

In [None]:
#importing xgboost package
import xgboost as xgb
#n_estimators implies number of boosting rounds
#max_depth implies the length of each round
#learning_rate controls the contribution of each tree in the ensemble
#subsample fraction of training instances to be randomly sampled for each boosting round
#random_state random seed used by the model for reproducibility
boosting_model = xgb.XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, subsample=0.8, random_state=42)

In [None]:
# Train your boosting model
boosting_model.fit(X_train, y_train)
#boosting_model.fit(X_train_lda, y_train)

In [None]:
# Make predictions on the test data
y_pred_boost = boosting_model.predict(X_test)
#y_pred_boost = boosting_model.predict(X_test_lda)

In [None]:
print(classification_report(y_pred_boost,y_test))

              precision    recall  f1-score   support

           0       0.81      0.79      0.80       100
           1       0.63      0.67      0.65        54

    accuracy                           0.75       154
   macro avg       0.72      0.73      0.73       154
weighted avg       0.75      0.75      0.75       154

