# Iris Data

In [91]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

import warnings
warnings.filterwarnings("ignore")

import pickle
import json

In [92]:
model_details = []
training_accuracy_list = []
testing_accuracy_list = []

## Problem Statement

## Data Gathering

In [93]:
df = pd.read_csv("Iris.csv")
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [94]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [95]:
df.isna().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [96]:
df.boxplot()

<AxesSubplot:xlabel='PetalWidthCm'>

### 1. SepalLengthCm

In [97]:
sns.boxplot(df["SepalLengthCm"])

<AxesSubplot:xlabel='SepalLengthCm'>

### 2. SepalWidthCm

In [98]:
sns.boxplot(df["SepalWidthCm"])

<AxesSubplot:xlabel='SepalWidthCm'>

In [99]:
q1 = df["SepalWidthCm"].quantile(0.25)
q2 = df["SepalWidthCm"].quantile(0.50)
q3 = df["SepalWidthCm"].quantile(0.75)

median = df["SepalWidthCm"].median()

iqr = q3 - q1

upper_tail = q3 + 1.5 * iqr
lower_tail = q3 - 1.5 * iqr

print("Q1 :", q1)
print("Q2 :", q2)
print("Q3 :", q3)
print("Median :", median)
print("IQR :", iqr)
print("Upper_Tail :", upper_tail)
print("Lower_Tail :", lower_tail)

Q1 : 2.8
Q2 : 3.0
Q3 : 3.3
Median : 3.0
IQR : 0.5
Upper_Tail : 4.05
Lower_Tail : 2.55


In [100]:
df[["SepalWidthCm"]].loc[(df["SepalWidthCm"] > upper_tail) | (df["SepalWidthCm"] < lower_tail)]

Unnamed: 0,SepalWidthCm
15,4.4
32,4.1
33,4.2
41,2.3
53,2.3
57,2.4
60,2.0
62,2.2
68,2.2
69,2.5


In [101]:
median_sw = df["SepalWidthCm"].loc[(df["SepalWidthCm"] <= upper_tail) & (df["SepalWidthCm"] >= lower_tail)].median()
median_sw

3.05

In [102]:
df.loc[(df["SepalWidthCm"] > upper_tail) & df["SepalWidthCm"] < lower_tail, "SepalWidthCm"]= median_sw

In [103]:
df[["SepalWidthCm"]].loc[(df["SepalWidthCm"] > upper_tail) | (df["SepalWidthCm"] < lower_tail)]

Unnamed: 0,SepalWidthCm


### 3. PetalLengthCm

In [104]:
sns.boxplot(df["PetalLengthCm"])

<AxesSubplot:xlabel='PetalLengthCm'>

### 4. PetalWidthCm

In [105]:
sns.boxplot(df["PetalWidthCm"])

<AxesSubplot:xlabel='PetalWidthCm'>

### 5. Species

In [106]:
df["Species"].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

In [107]:
df = df.drop("Id", axis = 1)
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.05,1.4,0.2,Iris-setosa
1,4.9,3.05,1.4,0.2,Iris-setosa
2,4.7,3.05,1.3,0.2,Iris-setosa
3,4.6,3.05,1.5,0.2,Iris-setosa
4,5.0,3.05,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.05,5.2,2.3,Iris-virginica
146,6.3,3.05,5.0,1.9,Iris-virginica
147,6.5,3.05,5.2,2.0,Iris-virginica
148,6.2,3.05,5.4,2.3,Iris-virginica


## Trian_Test_Split

In [108]:
x = df.drop("Species", axis = 1)
y = df["Species"]

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 11, stratify = y)

model_details.append("Logistic Regular Model")

In [109]:
x_train

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
8,4.4,3.05,1.4,0.2
23,5.1,3.05,1.7,0.5
101,5.8,3.05,5.1,1.9
49,5.0,3.05,1.4,0.2
111,6.4,3.05,5.3,1.9
...,...,...,...,...
94,5.6,3.05,4.2,1.3
62,6.0,3.05,4.0,1.0
115,6.4,3.05,5.3,2.3
21,5.1,3.05,1.5,0.4


In [110]:
y_train

8          Iris-setosa
23         Iris-setosa
101     Iris-virginica
49         Iris-setosa
111     Iris-virginica
            ...       
94     Iris-versicolor
62     Iris-versicolor
115     Iris-virginica
21         Iris-setosa
77     Iris-versicolor
Name: Species, Length: 120, dtype: object

## Model Building

In [111]:
log_model = LogisticRegression(multi_class = 'ovr')
log_model.fit(x_train, y_train)

LogisticRegression(multi_class='ovr')

## Model Evaluation

In [112]:
y_pred = log_model.predict(x_test)                                  # testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[10  0  0]
 [ 1  9  0]
 [ 0  0 10]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[19  1]
  [ 0 10]]

 [[20  0]
  [ 1  9]]

 [[20  0]
  [ 0 10]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9666666666666667
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       0.91      1.00      0.95        10
Iris-versicolor       1.00      0.90      0.95        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           0.97        30
      macro avg       0.97      0.97      0.97        30
   weighted avg       0.97      0.97      0.97        30



In [113]:
y_pred_train = log_model.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[40  0  0]
 [ 0 36  4]
 [ 0  2 38]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[80  0]
  [ 0 40]]

 [[78  2]
  [ 4 36]]

 [[76  4]
  [ 2 38]]]
------------------------------------------------------------------------------------------
Accuracy : 0.95
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        40
Iris-versicolor       0.95      0.90      0.92        40
 Iris-virginica       0.90      0.95      0.93        40

       accuracy                           0.95       120
      macro avg       0.95      0.95      0.95       120
   weighted avg       0.95      0.95      0.95       120



# KNN Algorithm

# 1. Normalization (0 to 1)

In [114]:
x_df = df.drop('Species', axis = 1)

x_df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.05,1.4,0.2
1,4.9,3.05,1.4,0.2
2,4.7,3.05,1.3,0.2
3,4.6,3.05,1.5,0.2
4,5.0,3.05,1.4,0.2


In [115]:
normal_scalar = MinMaxScaler()

array = normal_scalar.fit_transform(x_df)

x_normal_df = pd.DataFrame(array, columns=x_df.columns)
x_normal_df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,0.222222,0.0,0.067797,0.041667
1,0.166667,0.0,0.067797,0.041667
2,0.111111,0.0,0.050847,0.041667
3,0.083333,0.0,0.084746,0.041667
4,0.194444,0.0,0.067797,0.041667
...,...,...,...,...
145,0.666667,0.0,0.711864,0.916667
146,0.555556,0.0,0.677966,0.750000
147,0.611111,0.0,0.711864,0.791667
148,0.527778,0.0,0.745763,0.916667


In [116]:
x_85 = (85 - 0)/(199 - 0)
x_85

0.4271356783919598

# 2. Standardization (-3 to +3)

In [117]:
std_scalar = StandardScaler()

array = std_scalar.fit_transform(x_df)

x_std_df = pd.DataFrame(array, columns=x_df.columns)
x_std_df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,-0.900681,-8.881784e-16,-1.341272,-1.312977
1,-1.143017,-8.881784e-16,-1.341272,-1.312977
2,-1.385353,-8.881784e-16,-1.398138,-1.312977
3,-1.506521,-8.881784e-16,-1.284407,-1.312977
4,-1.021849,-8.881784e-16,-1.341272,-1.312977
...,...,...,...,...
145,1.038005,-8.881784e-16,0.819624,1.447956
146,0.553333,-8.881784e-16,0.705893,0.922064
147,0.795669,-8.881784e-16,0.819624,1.053537
148,0.432165,-8.881784e-16,0.933356,1.447956


# Model Building

### Train Test Split

In [118]:
x = df.drop('Species', axis = 1)                        # regular Model
y = df['Species']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

knn_log_model = KNeighborsClassifier()
knn_log_model.fit(x_train,y_train)

model_details.append("KNN Regular Model")

In [119]:
x = x_normal_df.copy()                                     # normalization
y = df["Species"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 15)

knn_normal_reg = KNeighborsClassifier()
knn_normal_reg.fit(x_train,y_train)

model_details.append("KNN Normalization")

In [120]:
x = x_std_df.copy()                                # standardization
y = df['Species']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 15)

knn_std_reg = KNeighborsClassifier()
knn_std_reg.fit(x_train,y_train)

model_details.append("KNN Standardization")

### Model Evalution

In [121]:
y_pred = knn_log_model.predict(x_test)                                  # regular testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[ 8  0  0]
 [13  0  0]
 [ 9  0  0]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[ 0 22]
  [ 0  8]]

 [[17  0]
  [13  0]]

 [[21  0]
  [ 9  0]]]
------------------------------------------------------------------------------------------
Accuracy : 0.26666666666666666
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       0.27      1.00      0.42         8
Iris-versicolor       0.00      0.00      0.00        13
 Iris-virginica       0.00      0.00      0.00         9

       accuracy                           0.27        30
      macro avg       0.09      0.33      0.14        30
   weighted avg       0.07      0.27      0.11        30



In [122]:
y_pred_train = knn_log_model.predict(x_train)                                  # regular training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[42  0  0]
 [37  0  0]
 [41  0  0]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[ 0 78]
  [ 0 42]]

 [[83  0]
  [37  0]]

 [[79  0]
  [41  0]]]
------------------------------------------------------------------------------------------
Accuracy : 0.35
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       0.35      1.00      0.52        42
Iris-versicolor       0.00      0.00      0.00        37
 Iris-virginica       0.00      0.00      0.00        41

       accuracy                           0.35       120
      macro avg       0.12      0.33      0.17       120
   weighted avg       0.12      0.35      0.18       120



In [123]:
y_pred = knn_normal_reg.predict(x_test)                                  # normalized testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[ 8  0  0]
 [11  2  0]
 [ 0  0  9]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[11 11]
  [ 0  8]]

 [[17  0]
  [11  2]]

 [[21  0]
  [ 0  9]]]
------------------------------------------------------------------------------------------
Accuracy : 0.6333333333333333
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       0.42      1.00      0.59         8
Iris-versicolor       1.00      0.15      0.27        13
 Iris-virginica       1.00      1.00      1.00         9

       accuracy                           0.63        30
      macro avg       0.81      0.72      0.62        30
   weighted avg       0.85      0.63      0.57        30



In [124]:
y_pred_train = knn_normal_reg.predict(x_train)                                  # normalized training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[42  0  0]
 [18 16  3]
 [ 1  2 38]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[59 19]
  [ 0 42]]

 [[81  2]
  [21 16]]

 [[76  3]
  [ 3 38]]]
------------------------------------------------------------------------------------------
Accuracy : 0.8
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       0.69      1.00      0.82        42
Iris-versicolor       0.89      0.43      0.58        37
 Iris-virginica       0.93      0.93      0.93        41

       accuracy                           0.80       120
      macro avg       0.83      0.79      0.77       120
   weighted avg       0.83      0.80      0.78       120



In [125]:
y_pred = knn_std_reg.predict(x_test)                                  # standardized testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[ 8  0  0]
 [ 0 13  0]
 [ 0  0  9]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[22  0]
  [ 0  8]]

 [[17  0]
  [ 0 13]]

 [[21  0]
  [ 0  9]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         8
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00         9

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [126]:
y_pred_train = knn_std_reg.predict(x_train)                                  # standardized training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[42  0  0]
 [ 0 35  2]
 [ 0  3 38]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[78  0]
  [ 0 42]]

 [[80  3]
  [ 2 35]]

 [[77  2]
  [ 3 38]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9583333333333334
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        42
Iris-versicolor       0.92      0.95      0.93        37
 Iris-virginica       0.95      0.93      0.94        41

       accuracy                           0.96       120
      macro avg       0.96      0.96      0.96       120
   weighted avg       0.96      0.96      0.96       120



# Hyperparameter Tunning

In [127]:
knn_clf = KNeighborsClassifier()

param_grid = {"n_neighbors" : np.arange(3,30),
                  "p": [1,2]} # Param_grid

gscv_knn_clf = GridSearchCV(knn_clf, param_grid)  # cv=5
gscv_knn_clf.fit(x_train, y_train) 
gscv_knn_clf.best_estimator_

model_details.append("KNN Hyperparameter Tuning")

In [128]:
(gscv_knn_clf.best_params_)

{'n_neighbors': 13, 'p': 2}

In [129]:
knn_log_model = gscv_knn_clf.best_estimator_ 

y_pred = knn_log_model.predict(x_test)                                  # testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[ 8  0  0]
 [ 0 13  0]
 [ 0  0  9]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[22  0]
  [ 0  8]]

 [[17  0]
  [ 0 13]]

 [[21  0]
  [ 0  9]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         8
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00         9

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [130]:
y_pred_train = knn_log_model.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[42  0  0]
 [ 0 35  2]
 [ 0  4 37]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[78  0]
  [ 0 42]]

 [[79  4]
  [ 2 35]]

 [[77  2]
  [ 4 37]]]
------------------------------------------------------------------------------------------
Accuracy : 0.95
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        42
Iris-versicolor       0.90      0.95      0.92        37
 Iris-virginica       0.95      0.90      0.92        41

       accuracy                           0.95       120
      macro avg       0.95      0.95      0.95       120
   weighted avg       0.95      0.95      0.95       120



# Decision Tree

### Train test Split

In [131]:
x = df.drop('Species', axis=1)
y = df['Species']

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=11, stratify=y)

model_details.append("Decision Tree Regular Model")

In [132]:
x_train

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
8,4.4,3.05,1.4,0.2
23,5.1,3.05,1.7,0.5
101,5.8,3.05,5.1,1.9
49,5.0,3.05,1.4,0.2
111,6.4,3.05,5.3,1.9
...,...,...,...,...
94,5.6,3.05,4.2,1.3
62,6.0,3.05,4.0,1.0
115,6.4,3.05,5.3,2.3
21,5.1,3.05,1.5,0.4


# Model Training

In [133]:
dt_clf = DecisionTreeClassifier(random_state=11)
dt_clf.fit(x_train, y_train)

# criterion='gini',
# max_depth=None,
# min_samples_split=2,
# min_samples_leaf=1,
# max_features=None,
# random_state=None,
# ccp_alpha=0.0,

DecisionTreeClassifier(random_state=11)

# Model Evaluation

In [134]:
y_pred = dt_clf.predict(x_test)                                  # testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[20  0]
  [ 0 10]]

 [[20  0]
  [ 0 10]]

 [[20  0]
  [ 0 10]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [135]:
y_pred_train = dt_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[40  0  0]
 [ 0 40  0]
 [ 0  0 40]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[80  0]
  [ 0 40]]

 [[80  0]
  [ 0 40]]

 [[80  0]
  [ 0 40]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        40
Iris-versicolor       1.00      1.00      1.00        40
 Iris-virginica       1.00      1.00      1.00        40

       accuracy                           1.00       120
      macro avg       1.00      1.00      1.00       120
   weighted avg       1.00      1.00      1.00       120



# Hyperparameter Tunning

In [136]:
import time
t1 = time.time()

dt_model = DecisionTreeClassifier(random_state=11)

# criterion='gini',
# max_depth=None,
# min_samples_split=2,
# min_samples_leaf=1,
# max_features=None,  # Random Forest
# random_state=None,
# ccp_alpha=0.0,  --> Prunning

hyperparameter = {"criterion":['gini', 'entropy'], 
                 "max_depth": np.arange(3,8),
                 "min_samples_split": np.arange(2,20),
                 "min_samples_leaf": np.arange(2,15)}  # 2 * 5 * 18 * 13 = 2340

rscv_dt_clf = RandomizedSearchCV(dt_model, hyperparameter, cv=5)
rscv_dt_clf.fit(x_train, y_train)

t2 = time.time()
print("Required Time :", t2-t1, "sec")

model_details.append("DT Hyperparameter Tuning")

Required Time : 0.3563807010650635 sec


In [137]:
rscv_dt_clf.best_estimator_

DecisionTreeClassifier(max_depth=7, min_samples_leaf=3, min_samples_split=5,
                       random_state=11)

In [138]:
dt_clf = rscv_dt_clf.best_estimator_
dt_clf.fit(x_train, y_train)                                  # testing data

y_pred = dt_clf.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[20  0]
  [ 0 10]]

 [[20  0]
  [ 0 10]]

 [[20  0]
  [ 0 10]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [139]:
y_pred_train = dt_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[40  0  0]
 [ 0 39  1]
 [ 0  2 38]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[80  0]
  [ 0 40]]

 [[78  2]
  [ 1 39]]

 [[79  1]
  [ 2 38]]]
------------------------------------------------------------------------------------------
Accuracy : 0.975
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        40
Iris-versicolor       0.95      0.97      0.96        40
 Iris-virginica       0.97      0.95      0.96        40

       accuracy                           0.97       120
      macro avg       0.98      0.97      0.97       120
   weighted avg       0.98      0.97      0.97       120



# Random Forest

In [140]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 10, stratify = y)
x_train

model_details.append("Random Forest Regular Model")

# Model Training

In [141]:
# Create an instance

rf_clf = RandomForestClassifier(random_state = 10)
rf_clf.fit(x_train, y_train)

RandomForestClassifier(random_state=10)

# Model Evaluation

In [142]:
y_pred = rf_clf.predict(x_test)                                  # testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[12  0  0]
 [ 0 13  0]
 [ 0  0 13]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[26  0]
  [ 0 12]]

 [[25  0]
  [ 0 13]]

 [[25  0]
  [ 0 13]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        12
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00        13

       accuracy                           1.00        38
      macro avg       1.00      1.00      1.00        38
   weighted avg       1.00      1.00      1.00        38



In [143]:
y_pred_train = rf_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[38  0  0]
 [ 0 37  0]
 [ 0  0 37]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[74  0]
  [ 0 38]]

 [[75  0]
  [ 0 37]]

 [[75  0]
  [ 0 37]]]
------------------------------------------------------------------------------------------
Accuracy : 1.0
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        38
Iris-versicolor       1.00      1.00      1.00        37
 Iris-virginica       1.00      1.00      1.00        37

       accuracy                           1.00       112
      macro avg       1.00      1.00      1.00       112
   weighted avg       1.00      1.00      1.00       112



### Hyperparameter Tunning for Random Forest

In [144]:
rf_model = RandomForestClassifier(random_state = 10)

n_estimators=100, # Number of DT
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
max_features='sqrt',
bootstrap=True,  # Can we avoid bootstrapping...?
oob_score=False,
n_jobs=-1,
random_state=None,
ccp_alpha=0.0

In [145]:
ccp_alpha_list = list(np.random.random(10))
ccp_alpha_list

[0.8447221974280935,
 0.3169021293056259,
 0.5536721585086157,
 0.8205545119411067,
 0.584280351187866,
 0.6184744929150805,
 0.4872398078787248,
 0.6073419201325948,
 0.4556564451249576,
 0.42991641376704304]

In [146]:
rf_model = RandomForestClassifier(random_state=10)

param_grid = {
                "n_estimators": np.arange(10,200),
                "criterion" : ['gini', 'entropy'],
                "max_depth" : np.arange(3,8),
                "min_samples_split" : np.arange(4,20),
                "min_samples_leaf": np.arange(3,10),
                "max_features" : ['sqrt', 'log2'],
                "oob_score" : [True],
                "ccp_alpha" : ccp_alpha_list
            }

rscv_rf_model = RandomizedSearchCV(rf_model, param_grid, cv = 5)
rscv_rf_model.fit(x_train, y_train)
rscv_rf_model.best_estimator_

model_details.append("RF Hyperparameter Tuning")

In [147]:
rf_clf = rscv_rf_model.best_estimator_
rf_clf.fit(x_train, y_train)                                  # testing data

y_pred = rf_clf.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[12  0  0]
 [ 0 13  0]
 [ 0  1 12]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[26  0]
  [ 0 12]]

 [[24  1]
  [ 0 13]]

 [[25  0]
  [ 1 12]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9736842105263158
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        12
Iris-versicolor       0.93      1.00      0.96        13
 Iris-virginica       1.00      0.92      0.96        13

       accuracy                           0.97        38
      macro avg       0.98      0.97      0.97        38
   weighted avg       0.98      0.97      0.97        38



In [148]:
y_pred_train = rf_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[38  0  0]
 [ 0 33  4]
 [ 0  2 35]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[74  0]
  [ 0 38]]

 [[73  2]
  [ 4 33]]

 [[71  4]
  [ 2 35]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9464285714285714
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        38
Iris-versicolor       0.94      0.89      0.92        37
 Iris-virginica       0.90      0.95      0.92        37

       accuracy                           0.95       112
      macro avg       0.95      0.95      0.95       112
   weighted avg       0.95      0.95      0.95       112



# AdaBoost Algoritm

In [149]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=30, stratify=y)
x_train

model_details.append("AdaBoost Regular Model")

## Model Building

In [150]:
adb_clf = AdaBoostClassifier(random_state=30)
adb_clf.fit(x_train, y_train)


# base_estimator=None,
# n_estimators=50,
# learning_rate=1.0,
# algorithm='SAMME.R',
# random_state=None,

AdaBoostClassifier(random_state=30)

## Model Evaluation

In [151]:
y_pred = adb_clf.predict(x_test)                   # testing data

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[10  0  0]
 [ 0  9  1]
 [ 0  3  7]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[20  0]
  [ 0 10]]

 [[17  3]
  [ 1  9]]

 [[19  1]
  [ 3  7]]]
------------------------------------------------------------------------------------------
Accuracy : 0.8666666666666667
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.75      0.90      0.82        10
 Iris-virginica       0.88      0.70      0.78        10

       accuracy                           0.87        30
      macro avg       0.88      0.87      0.87        30
   weighted avg       0.88      0.87      0.87        30



In [152]:
y_pred_train = adb_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[40  0  0]
 [ 0 39  1]
 [ 0  1 39]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[80  0]
  [ 0 40]]

 [[79  1]
  [ 1 39]]

 [[79  1]
  [ 1 39]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9833333333333333
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        40
Iris-versicolor       0.97      0.97      0.97        40
 Iris-virginica       0.97      0.97      0.97        40

       accuracy                           0.98       120
      macro avg       0.98      0.98      0.98       120
   weighted avg       0.98      0.98      0.98       120



# Hyperparameter Tunning

In [153]:
adb_clf = AdaBoostClassifier(random_state=30)

param_grid = {
            "n_estimators" : np.arange(10,200,2),
            "learning_rate" : np.arange(0,2,0.001)
                }

rscv_adb_clf = RandomizedSearchCV(adb_clf, param_grid, cv=7)
rscv_adb_clf.fit(x_train, y_train)
rscv_adb_clf.best_estimator_

model_details.append("AdaBoost Hyperparameter Tunning")

In [154]:
adb_clf = rscv_adb_clf.best_estimator_
adb_clf.fit(x_train, y_train)                                  # testing data

y_pred = adb_clf.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_test, y_pred)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_test, y_pred)
print("Classification Report :\n", clf_report)

testing_accuracy_list.append(accuracy)

Confusion Matrix :
 [[10  0  0]
 [ 0  9  1]
 [ 0  3  7]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[20  0]
  [ 0 10]]

 [[17  3]
  [ 1  9]]

 [[19  1]
  [ 3  7]]]
------------------------------------------------------------------------------------------
Accuracy : 0.8666666666666667
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.75      0.90      0.82        10
 Iris-virginica       0.88      0.70      0.78        10

       accuracy                           0.87        30
      macro avg       0.88      0.87      0.87        30
   weighted avg       0.88      0.87      0.87        30



In [155]:
y_pred_train = adb_clf.predict(x_train)                                  # training data

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix :\n", cnf_matrix)
print("-"* 90)

multi_cnf_matrix = multilabel_confusion_matrix(y_train, y_pred_train)
print("Multilabel confusion matrix :\n", multi_cnf_matrix)
print("-"* 90)

accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy :", accuracy)
print("-"* 90)

clf_report = classification_report(y_train, y_pred_train)
print("Classification Report :\n", clf_report)

training_accuracy_list.append(accuracy)

Confusion Matrix :
 [[40  0  0]
 [ 0 39  1]
 [ 0  1 39]]
------------------------------------------------------------------------------------------
Multilabel confusion matrix :
 [[[80  0]
  [ 0 40]]

 [[79  1]
  [ 1 39]]

 [[79  1]
  [ 1 39]]]
------------------------------------------------------------------------------------------
Accuracy : 0.9833333333333333
------------------------------------------------------------------------------------------
Classification Report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        40
Iris-versicolor       0.97      0.97      0.97        40
 Iris-virginica       0.97      0.97      0.97        40

       accuracy                           0.98       120
      macro avg       0.98      0.98      0.98       120
   weighted avg       0.98      0.98      0.98       120



In [156]:
len(model_details)

11

In [157]:
len(testing_accuracy_list)

11

In [158]:
len(training_accuracy_list)

11

In [159]:
comparison_df = pd.DataFrame({"Models": model_details, "Testing Accuracy": testing_accuracy_list, "Training Accuracy": training_accuracy_list})
comparison_df

Unnamed: 0,Models,Testing Accuracy,Training Accuracy
0,Logistic Regular Model,0.966667,0.95
1,KNN Regular Model,0.266667,0.35
2,KNN Normalization,0.633333,0.8
3,KNN Standardization,1.0,0.958333
4,KNN Hyperparameter Tuning,1.0,0.95
5,Decision Tree Regular Model,1.0,1.0
6,DT Hyperparameter Tuning,1.0,0.975
7,Random Forest Regular Model,1.0,1.0
8,RF Hyperparameter Tuning,0.973684,0.946429
9,AdaBoost Regular Model,0.866667,0.983333


In [160]:
comparison_df["Difference_in_%"] = (comparison_df['Training Accuracy'] - comparison_df['Testing Accuracy']) *100
comparison_df

Unnamed: 0,Models,Testing Accuracy,Training Accuracy,Difference_in_%
0,Logistic Regular Model,0.966667,0.95,-1.666667
1,KNN Regular Model,0.266667,0.35,8.333333
2,KNN Normalization,0.633333,0.8,16.666667
3,KNN Standardization,1.0,0.958333,-4.166667
4,KNN Hyperparameter Tuning,1.0,0.95,-5.0
5,Decision Tree Regular Model,1.0,1.0,0.0
6,DT Hyperparameter Tuning,1.0,0.975,-2.5
7,Random Forest Regular Model,1.0,1.0,0.0
8,RF Hyperparameter Tuning,0.973684,0.946429,-2.725564
9,AdaBoost Regular Model,0.866667,0.983333,11.666667


## Single User Input

In [161]:
x.head(1).T

Unnamed: 0,0
SepalLengthCm,5.1
SepalWidthCm,3.05
PetalLengthCm,1.4
PetalWidthCm,0.2


In [162]:
column_names = x.columns
column_names

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], dtype='object')

## Create Pickle File

In [163]:
with open("Logistic_model.pkl", "wb") as f:
    pickle.dump(log_model, f)

In [164]:
with open("Best Model.pkl", "wb") as f:
    pickle.dump(dt_clf, f)

## Create JSON File

In [165]:
json_data = {"columns" : list(column_names)}
json_data

{'columns': ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']}

In [166]:
with open("json_data.json", "w") as f:
    json.dump(json_data, f)

In [167]:
SepalLengthCm = 5.10
SepalWidthCm  = 3.05
PetalLengthCm = 1.40
PetalWidthCm  = 0.20

In [168]:
test_array = np.zeros(len(x.columns))

test_array[0] = SepalLengthCm
test_array[1] = SepalWidthCm
test_array[2] = PetalLengthCm
test_array[3] = PetalWidthCm

test_array

array([5.1 , 3.05, 1.4 , 0.2 ])

In [169]:
species = log_model.predict([test_array])[0]
print("Species of Flower is :", species)

Species of Flower is : Iris-setosa


In [170]:
species = dt_clf.predict([test_array])[0]
print("Species of Flower is :", species)

Species of Flower is : Iris-setosa
