<a href="https://colab.research.google.com/github/Mahesharvinds/DataAnalyticsCourseWork/blob/master/Inroduction_to_Ensemble_Modelling_SKLearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Getting started with Ensemble Modelling using SK Learn

### Importing necessary packages and modules

In [0]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix 
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

### Loading the "Forest Cover Type Prediction" data set into Pandas Data Frame

In [0]:
data = pd.read_csv("Forest_Cover_Type_Prediction.csv")

### Slicing the data into Predictors and Response variable

In [0]:
predictors = data.iloc[:,0:55]
response = data.iloc[:,55]

### Splitting the data into Training Set and Testing Set

In [0]:
train_predictors, test_predictors, train_response, test_response = train_test_split(predictors, response, random_state = 0)

### K Fold Cross Validation Data Split

In [0]:
kfold = model_selection.KFold(n_splits=10, random_state=3)

### Bagging Ensemble Classifier Model

In [0]:
bagging_model = BaggingClassifier().fit(train_predictors, train_response) #Model Building
predicted_test_response = bagging_model.predict(test_predictors)  #Prediction

### Performance Evaluation of Bagging Ensemble Model (Confusion Matrix, Accuracy Score & Classification Report)

In [9]:
confusion_matrix(test_response, predicted_test_response)

array([[415,  89,   1,   0,  10,   2,  28],
       [119, 325,  17,   0,  47,   9,   8],
       [  0,   3, 444,  31,  11,  62,   0],
       [  0,   0,  15, 518,   0,  11,   0],
       [  3,  28,   9,   0, 480,   5,   0],
       [  0,   8,  72,   8,  11, 460,   0],
       [ 37,   3,   0,   0,   0,   0, 491]])

In [10]:
accuracy_score(test_response, predicted_test_response)

0.8288359788359788

In [11]:
print(classification_report(test_response,predicted_test_response))

             precision    recall  f1-score   support

          1       0.72      0.76      0.74       545
          2       0.71      0.62      0.66       525
          3       0.80      0.81      0.80       551
          4       0.93      0.95      0.94       544
          5       0.86      0.91      0.89       525
          6       0.84      0.82      0.83       559
          7       0.93      0.92      0.93       531

avg / total       0.83      0.83      0.83      3780



### K Fold Cross Validation of Baggging Ensemble Model

In [12]:
results = model_selection.cross_val_score(bagging_model, train_predictors, train_response, cv=kfold)
print(results.mean())

0.8358024691358024


### Boosting Ensemble Classifier Model

In [0]:
boosting_model = GradientBoostingClassifier(n_estimators=100, random_state=3).fit(train_predictors, train_response)
predicted_test_response = boosting_model.predict(test_predictors)

### Performance Evaluation of Boosting Ensemble Model (Confusion Matrix, Accuracy Score & Classification Report)

In [14]:
confusion_matrix(test_response, predicted_test_response)

array([[390,  91,   1,   0,   9,   4,  50],
       [123, 305,  14,   0,  58,  16,   9],
       [  0,   0, 415,  35,  15,  85,   1],
       [  0,   0,   9, 527,   0,   8,   0],
       [  0,  40,  13,   0, 458,  14,   0],
       [  0,   2,  84,  15,  26, 432,   0],
       [ 29,   0,   0,   0,   0,   0, 502]])

In [15]:
accuracy_score(test_response, predicted_test_response)

0.8013227513227513

In [16]:
print(classification_report(test_response,predicted_test_response))

             precision    recall  f1-score   support

          1       0.72      0.72      0.72       545
          2       0.70      0.58      0.63       525
          3       0.77      0.75      0.76       551
          4       0.91      0.97      0.94       544
          5       0.81      0.87      0.84       525
          6       0.77      0.77      0.77       559
          7       0.89      0.95      0.92       531

avg / total       0.80      0.80      0.80      3780



### K Fold Cross Validation of Boosting Ensemble Model

In [17]:
results = model_selection.cross_val_score(boosting_model, train_predictors, train_response, cv=kfold)
print(results.mean())

0.8081128747795414


### RandomForest Ensemble Classifier Model

In [0]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=3).fit(train_predictors, train_response)
predicted_test_response = rf_model.predict(test_predictors)

### Performance Evaluation of RandomForest Ensemble Model (Confusion Matrix, Accuracy Score & Classification Report)

In [19]:
confusion_matrix(test_response, predicted_test_response)

array([[417,  83,   1,   0,  10,   1,  33],
       [ 85, 367,  18,   0,  38,  10,   7],
       [  0,   1, 458,  28,   7,  57,   0],
       [  0,   0,   9, 528,   0,   7,   0],
       [  1,  19,   9,   0, 492,   4,   0],
       [  0,   4,  51,  11,  10, 483,   0],
       [ 17,   1,   0,   0,   0,   0, 513]])

In [20]:
accuracy_score(test_response, predicted_test_response)

0.861904761904762

In [21]:
print(classification_report(test_response,predicted_test_response))

             precision    recall  f1-score   support

          1       0.80      0.77      0.78       545
          2       0.77      0.70      0.73       525
          3       0.84      0.83      0.84       551
          4       0.93      0.97      0.95       544
          5       0.88      0.94      0.91       525
          6       0.86      0.86      0.86       559
          7       0.93      0.97      0.95       531

avg / total       0.86      0.86      0.86      3780



### K Fold Cross Validation of Random Forest Ensemble Model

In [22]:
results = model_selection.cross_val_score(rf_model, train_predictors, train_response, cv=kfold)
print(results.mean())

0.8715167548500883
