# Problem statement


Predict the flower type on wether it is setosa , versicolor or virginica.

In [2]:
# Importing libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report,multilabel_confusion_matrix



In [3]:
df = pd.read_csv("Iris.csv")
df.head(10)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
6,7,4.6,3.4,1.4,0.3,Iris-setosa
7,8,5.0,3.4,1.5,0.2,Iris-setosa
8,9,4.4,2.9,1.4,0.2,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa


In [4]:
df.drop('Id',axis=1,inplace=True)

In [5]:
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [7]:
df.Species.value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

In [8]:
df.replace({"Iris-setosa":1,"Iris-versicolor":2,"Iris-virginica":3},inplace=True)

# Setting X and Y variable

In [9]:
x = df.drop("Species",axis=1)
y = df['Species']

# Creating Logistic regression Model

In [15]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.3,random_state=12,stratify=y)

In [16]:
x_train.shape , x_test.shape , y_train.shape , y_test.shape

((105, 4), (45, 4), (105,), (45,))

In [17]:
model = LogisticRegression(max_iter=200)
model.fit(x_train,y_train)

LogisticRegression(max_iter=200)

# Now Model Evaluation Part

In [18]:
model.score(x_train,y_train)

0.9809523809523809

In [19]:
model.score(x_test,y_test)

0.9777777777777777

### --> Model Evaluation Function

In [20]:
def model_eval(model,x,y):
    y_pred = model.predict(x)
    acc_score = accuracy_score(y,y_pred)
    print(f"The Accuracy score of the Model is :- {acc_score}")
    conf_mat = confusion_matrix(y,y_pred)
    print(f"The Confusion matrix of the Model is :- {acc_score}")
    clf_rpt = classification_report(y,y_pred)
    print("*"*30)
    print(f"Classification Report is:- \n {clf_rpt}")
    ml_cnf = multilabel_confusion_matrix(y,y_pred)
    print("*"*30)
    print(f"Multi_lable confusion Matrixes :- \n {ml_cnf}")
    
    return "! Success !"


In [21]:
# For Training Data
model_eval(model,x_train,y_train)

The Accuracy score of the Model is :- 0.9809523809523809
The Confusion matrix of the Model is :- 0.9809523809523809
******************************
Classification Report is:- 
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        35
           2       1.00      0.94      0.97        35
           3       0.95      1.00      0.97        35

    accuracy                           0.98       105
   macro avg       0.98      0.98      0.98       105
weighted avg       0.98      0.98      0.98       105

******************************
Multi_lable confusion Matrixes :- 
 [[[70  0]
  [ 0 35]]

 [[70  0]
  [ 2 33]]

 [[68  2]
  [ 0 35]]]


'! Success !'

In [22]:
# For Testing Data
model_eval(model,x_test,y_test)

The Accuracy score of the Model is :- 0.9777777777777777
The Confusion matrix of the Model is :- 0.9777777777777777
******************************
Classification Report is:- 
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        15
           2       1.00      0.93      0.97        15
           3       0.94      1.00      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

******************************
Multi_lable confusion Matrixes :- 
 [[[30  0]
  [ 0 15]]

 [[30  0]
  [ 1 14]]

 [[29  1]
  [ 0 15]]]


'! Success !'

In [23]:
# Creating Pickel File
import pickle

In [24]:
with open ("model.pkl","wb") as f:
    pickle.dump(model,f)

In [30]:
# Creating Json File
import json

In [31]:
x.columns

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], dtype='object')

In [32]:
x.columns.tolist()

['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']

In [33]:
d1 = {"columns":['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],"result_values":{"Iris-setosa":1,"Iris-versicolor":2,"Iris-virginica":3}}

with open ("asset.json",'w') as f:
    json.dump(d1,f)