# **Experiment 10 Ensemble Learning**
The steps to do this experiment:
1. We need to import the necessary libraries (here we are importing all of lab experiment libraries).
2. For both RandomForest and AdaBooster: we need to do data preprocessing.
3. For both randomforst and Adabooster: choosing independen(X) and dependent(Y) variable.
4. For random forest: creating models with different parameters, finding best parameters and evaluating.
5. For adabooster: creating models with different parameters, finding best parameters and evaluating

The codes for each step is below:

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report,confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

In [None]:
df=pd.read_csv("Iris.csv")
imputer = SimpleImputer(missing_values=np.nan, strategy = "mean")
df[df.select_dtypes(include=[np.number]).columns]=imputer.fit_transform(df.select_dtypes(include=[np.number]))
if df['Species'].dtype=='object':
    encoder = LabelEncoder()
    df['Species']=encoder.fit_transform(df['Species'])

In [None]:
X = df.drop('Species', axis=1)
Y=df['Species']
X_train, X_test, Y_train, Y_test= train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
rf_params = [
    {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto'},
    {'n_estimators': 150, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt'},
    {'n_estimators': 200, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'log2'},
    {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'auto'},
]
for params in rf_params:
    clf_rf = RandomForestClassifier(n_estimators=10, random_state=0)
    clf_rf.fit(X_train, Y_train)
    Y_pred=clf_rf.predict(X_test)
    print("Random Forest Parameters: ", params)
    print("Confusion Matrix: \n", confusion_matrix(Y_test, Y_pred))
    print("Accuracy: \n", accuracy_score(Y_test, Y_pred))
    print('Precision: \n', precision_score(Y_test, Y_pred, average='macro'))
    print("F1 Score: \n", f1_score(Y_test, Y_pred, average='macro'))
    print("Recall: \n", recall_score(Y_test, Y_pred, average='macro'))
    print("classification_report: \n", classification_report(Y_test, Y_pred))

In [None]:
ab_params=[
    {'n_estimators': 50, 'learning_rate': 1.0, 'algorithm': 'SAMME.R'},
    {'n_estimators': 100, 'learning_rate': 0.5, 'algorithm': 'SAMME'},
    {'n_estimators': 150, 'learning_rate': 0.75, 'algorithm': 'SAMME.R'},
    {'n_estimators': 200, 'learning_rate': 1.0, 'algorithm': 'SAMME'}, 
] 
for params in ab_params:
    clf_ab=AdaBoostClassifier(**params, random_state=0)
    clf_ab.fit(X_train, Y_train)
    Y_pred=clf_ab.predict(X_test)
    print("AdaBooster Paramter: ", params)
    print("Confusion Matrix: \n", confusion_matrix(Y_test, Y_pred))
    print("Accuracy Score: \n", accuracy_score(Y_test, Y_pred))
    print("Precision Score: \n", precision_score(Y_test, Y_pred, average='macro'))
    print("Recall Score: \n", recall_score(Y_test, Y_pred, average='macro'))
    print("F1 Score: \n", f1_score(Y_test, Y_pred, average='macro'))
    print("Classification Report: \n", classification_report(Y_test, Y_pred))