In [1]:
# Bagging Algorithm: Random Forest

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
from sklearn.datasets import load_iris

In [5]:
iris = load_iris()

In [6]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [7]:
# Creating DataFrame

In [8]:
X = pd.DataFrame(iris.data, columns=iris.feature_names)
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [11]:
y = pd.DataFrame(iris.target, columns=['target'])
y.head()

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0


In [12]:
# Splitting the Dataset to Train & Test

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
X_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
22,4.6,3.6,1.0,0.2
15,5.7,4.4,1.5,0.4
65,6.7,3.1,4.4,1.4
11,4.8,3.4,1.6,0.2
42,4.4,3.2,1.3,0.2
...,...,...,...,...
71,6.1,2.8,4.0,1.3
106,4.9,2.5,4.5,1.7
14,5.8,4.0,1.2,0.2
92,5.8,2.6,4.0,1.2


In [15]:
# Training Random Forest Model

In [16]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model

In [17]:
# Train the Model

rf_model.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [19]:
# Make Predictions on Test Data

y_pred = rf_model.predict(X_test)
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [20]:
# Evaluate the Model

# Accuracy Score

from sklearn.metrics import accuracy_score
acc_score = accuracy_score(y_test, y_pred)
print(f"Accuracy Score: {acc_score}")

Accuracy Score: 1.0


In [23]:
# Classification Report

class_report = classification_report(y_test, y_pred)
print(f"Classification: {class_report}")

Classification:               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [27]:
# Confusion Matrix

conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [34]:
# Checking Feature Importance

feature_importance = rf_model.feature_importances_
feature_importance

for feature, importance in zip(iris.feature_names, feature_importance):
    print(f"{feature}: {importance}")

sepal length (cm): 0.10809762464246378
sepal width (cm): 0.030386812473242528
petal length (cm): 0.43999397414456937
petal width (cm): 0.4215215887397244


In [37]:
# Hyperparameter Tunning: To increase model Performance

rf_tunned_model = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
rf_tunned_model.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [38]:
y_pred_tunned = rf_tunned_model.predict(X_test)
y_pred_tunned

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [40]:
# Evaluate the Model

acc_score_tunned = accuracy_score(y_test, y_pred_tunned)

print(f"Accuracy score of Tunned Model: {acc_score_tunned }")

Accuracy score of Tunned Model: 1.0


In [41]:
# Pickling the Model

In [42]:
import pickle

In [43]:
pickle.dump(rf_tunned_model, open('Random_Forest_Model.pkl', 'wb'))

In [45]:
pickled_model = pickle.load(open('Random_Forest_Model.pkl', 'rb'))

# Batch Inputs I have given
pickled_model.predict(X_test)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [46]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [48]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [49]:
feature_dict = {'sepal length (cm)': 5,
 'sepal width (cm)':3,
 'petal length (cm)':1,
 'petal width (cm)':2}

In [50]:
feature_dict

{'sepal length (cm)': 5,
 'sepal width (cm)': 3,
 'petal length (cm)': 1,
 'petal width (cm)': 2}

In [51]:
feature_dict.values()

dict_values([5, 3, 1, 2])

In [53]:
list(feature_dict.values())

[5, 3, 1, 2]

In [57]:
list([feature_dict.values()])

[dict_values([5, 3, 1, 2])]

In [60]:
# Single Inputs I have given
pickled_model.predict([list(feature_dict.values())])[0]



0