In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
dataset = pd.read_csv('iris.csv')

In [3]:
dataset.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [4]:
dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [23]:
#Splitting the dataset into the Training set and Test set
X = dataset.iloc[:, [0,1,2, 3]].values
y = dataset.iloc[:, 4].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [24]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [25]:
# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 42, solver='lbfgs')
classifier.fit(X_train, y_train)

LogisticRegression(random_state=42)

In [26]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [27]:
# Predict probabilities
probs_y=classifier.predict_proba(X_test)

In [28]:
probs_y = np.round(probs_y, 2)

In [29]:
res = "{:<10} | {:<10} | {:<10} | {:<13} | {:<5}".format("y_test", "y_pred", "Setosa(%)", "versicolor(%)", "virginica(%)\n")
res += "-"*65+"\n"
res += "\n".join("{:<10} | {:<10} | {:<10} | {:<13} | {:<10}".format(x, y, a, b, c) for x, y, a, b, c in zip(y_test, y_pred, probs_y[:,0], probs_y[:,1], probs_y[:,2]))
res += "\n"+"-"*65+"\n"
print(res)

y_test     | y_pred     | Setosa(%)  | versicolor(%) | virginica(%)
-----------------------------------------------------------------
Iris-versicolor | Iris-versicolor | 0.01       | 0.87          | 0.12      
Iris-setosa | Iris-setosa | 0.97       | 0.03          | 0.0       
Iris-virginica | Iris-virginica | 0.0        | 0.0           | 1.0       
Iris-versicolor | Iris-versicolor | 0.01       | 0.75          | 0.23      
Iris-versicolor | Iris-versicolor | 0.0        | 0.74          | 0.25      
Iris-setosa | Iris-setosa | 0.93       | 0.07          | 0.0       
Iris-versicolor | Iris-versicolor | 0.09       | 0.88          | 0.03      
Iris-virginica | Iris-virginica | 0.0        | 0.06          | 0.93      
Iris-versicolor | Iris-versicolor | 0.0        | 0.6           | 0.4       
Iris-versicolor | Iris-versicolor | 0.03       | 0.92          | 0.05      
Iris-virginica | Iris-virginica | 0.0        | 0.2           | 0.79      
Iris-setosa | Iris-setosa | 0.95       | 0.05       

In [30]:
classifier.predict([[5.1,3.5,1.4,0.2]])

array(['Iris-versicolor'], dtype=object)

In [31]:
classifier.predict_proba([[5.0,3.6,1.4,0.2]])

array([[7.31265952e-04, 7.93000502e-01, 2.06268232e-01]])

In [32]:
classifier.score(X_train, y_train)

0.9642857142857143

In [33]:
expected = y_train
predicted = classifier.predict(X_train)

In [34]:
from sklearn import metrics
print(metrics.classification_report(expected, predicted))


                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        35
Iris-versicolor       0.97      0.92      0.95        39
 Iris-virginica       0.93      0.97      0.95        38

       accuracy                           0.96       112
      macro avg       0.97      0.97      0.97       112
   weighted avg       0.97      0.96      0.96       112

