### **[3] Random Forest**

In [None]:
# Import Random Forest algoritham in envirnoment
from sklearn.ensemble import RandomForestClassifier

# Fitting Random Forest model to training set
classifier_rf = RandomForestClassifier(criterion='entropy', max_leaf_nodes=10, random_state=0)
rf=classification_model(X_train, X_test, y_train, y_test, classifier_rf)

In [None]:
# Plot ROC curve for Random Forest classifier
y_pred=rf['y_test_pred']
plot_roc_curve(y_test, y_pred)

#### **<u>Cross- Validation & Hyperparameter Tuning</u>**

In [None]:
# Import Random Forest algoritham in envirnoment
from sklearn.ensemble import RandomForestClassifier

## Fitting Random Forest model to training set using cross validation

# Defining param_dict
param_grid = {'n_estimators':[50, 80, 100],
              'max_depth':[4, 6, 8],
              'min_samples_split':[30, 50, 70],
              'min_samples_leaf': [20, 30, 40], 'max_leaf_nodes': [ 35, 40, 45]} 
# Creating instance of Random Forest classifier
classifier_rf = RandomForestClassifier(criterion='entropy', random_state=0) 
# Fitting model
rf_cv=classification_CV_model(X_train, X_test, y_train, y_test, classifier_rf, param_grid)

In [None]:
# Plot ROC curve for Random Forest classifier
y_pred=rf_cv['y_test_pred']
plot_roc_curve(y_test, y_pred)

#### **<u>Comparing Evaluation Metrics of Train/Test Split and Cross-Validation</u>**

In [None]:
# Comparing evaluation metrics of train_test_split and GridSearchCV of Random Forest model

import pandas as pd
import matplotlib.pyplot as plt

# Data 
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-score','roc_auc_score']
train_test_split = [rf['accuracy_test'],rf['precision_test'],rf['recall_test'],rf['f1_test'],rf['roc_auc_score_test']]
cross_validation = [rf_cv['accuracy_test'],rf_cv['precision_test'],rf_cv['recall_test'],rf_cv['f1_test'],rf_cv['roc_auc_score_test']]

# Create a dataframe
rf_evaluation_df = pd.DataFrame({'Evaluation Metrics': metrics,
                   'Train/Test Split': train_test_split,
                   'Cross-Validation': cross_validation})
# Print dataframe
print('=='*45)
print(tabulate(rf_evaluation_df, headers='keys', tablefmt='grid'))
print('\n', '=='*45, '\n')

# Create a barplot
plt.figure(figsize=(7,7))
ax=rf_evaluation_df.plot.bar(x='Evaluation Metrics', rot=0)

# Set the title and labels
ax.set_title("Comparing Evaluation Metrics of Train-Test Split vs. Cross-Validation for Random Forest")
ax.set_xlabel("Evaluation Metrics")
ax.set_ylabel("Accuracy Score")
ax.legend(loc="lower right")

# Show the plot
plt.show()
print('=='*45)

**<u>Observations</u> :**
- We saw slightly improvement in the model after training with cross-validation.
- Major improvement seen in Recall.
- We got 0.84% of Recall using train_test_split and 0.90% of Recall using cross-validation in Random Forest model.

### **[5] XGBoost**

In [None]:
# Import XGBoost algoritham in envirnoment
from xgboost import XGBClassifier

# Fitting XGBoost model to training set
classifier_xgb = XGBClassifier(max_leaf_nodes=10, random_state=0)
xgb=classification_model(X_train, X_test, y_train, y_test, classifier_gbm)

In [None]:
# Plot ROC curve for XGBoost classifier
y_pred=xgb['y_test_pred']
plot_roc_curve(y_test, y_pred)

#### **<u>Cross- Validation & Hyperparameter Tuning</u>**

In [None]:
# Import XGBoost algoritham in envirnoment
from xgboost import XGBClassifier

## Fitting XGBoost model to training set using cross validation

# Defining param_dict
param_grid = {'n_estimators':[50,80,100],
              'max_depth':[4,6,8],
              'min_samples_split':[50,100,150],
              'min_samples_leaf':[40,50], 'eta':[0.3],
             'gamma': [0],
             'lambda': [0, 1, 2]}
# Creating instance of XGBoost classifier
classifier_xgb = XGBClassifier(max_leaf_nodes=10, random_state=0)
# Fitting model
xgb_cv=classification_CV_model(X_train, X_test, y_train, y_test, classifier_xgb, param_grid)

In [None]:
# Plot ROC curve for XGBoost classifier
y_pred=xgb_cv['y_test_pred']
plot_roc_curve(y_test, y_pred)

#### **<u>Comparing Evaluation Metrics of Train/Test Split and Cross-Validation</u>**

In [None]:
# Comparing evaluation metrics of train_test_split and GridSearchCV of XGBoost model

import pandas as pd
import matplotlib.pyplot as plt

# Data 
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-score','roc_auc_score']
train_test_split = [xgb['accuracy_test'],xgb['precision_test'],xgb['recall_test'],xgb['f1_test'],xgb['roc_auc_score_test']]
cross_validation = [xgb_cv['accuracy_test'],xgb_cv['precision_test'],xgb_cv['recall_test'],xgb_cv['f1_test'],xgb_cv['roc_auc_score_test']]

# Create a dataframe
xgb_evaluation_df = pd.DataFrame({'Evaluation Metrics': metrics,
                   'Train/Test Split': train_test_split,
                   'Cross-Validation': cross_validation})
# Print dataframe
print('=='*45)
print(tabulate(xgb_evaluation_df, headers='keys', tablefmt='grid'))
print('\n', '=='*45, '\n')

# Create a barplot
plt.figure(figsize=(7,7))
ax=xgb_evaluation_df.plot.bar(x='Evaluation Metrics', rot=0)

# Set the title and labels
ax.set_title("Comparing Evaluation Metrics of Train-Test Split vs. Cross-Validation for XGBoost")
ax.set_xlabel("Evaluation Metrics")
ax.set_ylabel("Accuracy Score")
ax.legend(loc="lower right")

# Show the plot
plt.show()
print('=='*45)

**<u>Observations</u> :**
- We found improvement in the model after training model using cross-validation.
- More Accuracy seen in model trained using cross-validation.
- We got 0.92 % of Recall using train_test_split and 0.93 % of Recall using cross validation in XGBoost model.
- Major improvement found in Precision which is 0.91 from 0.93