In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import opendatasets as od
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
data_dir = "D://Ronak//Python//multi_class classification"

In [3]:
os.listdir(data_dir)

['.ipynb_checkpoints', 'IRIS.csv', 'multi_class_classification.ipynb']

In [4]:
csv = data_dir + "/IRIS.csv"

In [5]:
data = pd.read_csv(csv)

In [6]:
print(data)

     sepal_length  sepal_width  petal_length  petal_width         species
0             5.1          3.5           1.4          0.2     Iris-setosa
1             4.9          3.0           1.4          0.2     Iris-setosa
2             4.7          3.2           1.3          0.2     Iris-setosa
3             4.6          3.1           1.5          0.2     Iris-setosa
4             5.0          3.6           1.4          0.2     Iris-setosa
..            ...          ...           ...          ...             ...
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

[150 rows x 5 columns]


In [7]:
data.isna().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [9]:
input_data = data[['sepal_length','sepal_width','petal_length','petal_width']]

In [10]:
print(input_data)

     sepal_length  sepal_width  petal_length  petal_width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns]


In [11]:
output_data = data['species']

In [12]:
print(output_data)

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: species, Length: 150, dtype: object


In [13]:
train_input,test_input,train_output,test_output = train_test_split(input_data,output_data , test_size = 0.25 , random_state = 42)

In [14]:
print(train_input)

     sepal_length  sepal_width  petal_length  petal_width
4             5.0          3.6           1.4          0.2
32            5.2          4.1           1.5          0.1
142           5.8          2.7           5.1          1.9
85            6.0          3.4           4.5          1.6
86            6.7          3.1           4.7          1.5
..            ...          ...           ...          ...
71            6.1          2.8           4.0          1.3
106           4.9          2.5           4.5          1.7
14            5.8          4.0           1.2          0.2
92            5.8          2.6           4.0          1.2
102           7.1          3.0           5.9          2.1

[112 rows x 4 columns]


In [15]:
model = LogisticRegression(multi_class = 'multinomial' , solver = 'lbfgs')

In [16]:
model.fit(train_input,train_output)

In [17]:
arr = model.predict(test_input)

In [18]:
print(arr)

['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-setosa']


In [19]:
prob = model.predict_proba(test_input)

In [20]:
print(prob)

[[3.96203029e-03 8.21858620e-01 1.74179350e-01]
 [9.44656227e-01 5.53434652e-02 3.07414314e-07]
 [1.17209961e-08 1.82759736e-03 9.98172391e-01]
 [6.61067844e-03 7.87162917e-01 2.06226405e-01]
 [1.52756406e-03 7.69482894e-01 2.28989542e-01]
 [9.53252146e-01 4.67475955e-02 2.58954258e-07]
 [7.73046633e-02 9.07215552e-01 1.54797846e-02]
 [1.73337477e-04 1.58925517e-01 8.40901145e-01]
 [2.32607696e-03 7.75008279e-01 2.22665644e-01]
 [2.87026381e-02 9.44113688e-01 2.71836737e-02]
 [4.59226687e-04 2.41311890e-01 7.58228883e-01]
 [9.66222207e-01 3.37776824e-02 1.10938728e-07]
 [9.71390646e-01 2.86093027e-02 5.10331278e-08]
 [9.59853509e-01 4.01463321e-02 1.58607549e-07]
 [9.78003936e-01 2.19959626e-02 1.01330690e-07]
 [4.64506704e-03 6.96757476e-01 2.98597457e-01]
 [8.29571052e-06 2.58669522e-02 9.74124752e-01]
 [2.76821967e-02 9.46693411e-01 2.56243925e-02]
 [8.46782987e-03 8.27130094e-01 1.64402077e-01]
 [1.64783300e-05 3.92308290e-02 9.60752693e-01]
 [9.62130901e-01 3.78688228e-02 2.759037

In [21]:
from sklearn.metrics import classification_report , confusion_matrix

In [22]:
print(classification_report(test_output , arr))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       1.00      1.00      1.00        11
 Iris-virginica       1.00      1.00      1.00        12

       accuracy                           1.00        38
      macro avg       1.00      1.00      1.00        38
   weighted avg       1.00      1.00      1.00        38



In [23]:
model.predict([[5.1, 1.5,1.4,5.2]])



array(['Iris-virginica'], dtype=object)

In [24]:
print(confusion_matrix(test_output,arr))

[[15  0  0]
 [ 0 11  0]
 [ 0  0 12]]


In [26]:
import pickle
pickle.dump(model,open("model.pkl",'wb'))