In [2]:
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [3]:
df = pd.read_csv('./Iris.csv').iloc[:,1:]
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [4]:
df.Species.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
x = df.drop(columns=['Species'])
x

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [6]:
y = df.Species.str.strip().eq('Iris-setosa').astype(int)
y

0      1
1      1
2      1
3      1
4      1
      ..
145    0
146    0
147    0
148    0
149    0
Name: Species, Length: 150, dtype: int32

## Split the Data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
	x, y, test_size=0.2, stratify=y, random_state=42
)

In [8]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(120, 4) (120,)
(30, 4) (30,)


In [9]:
y_train.value_counts(normalize=True)


Species
0    0.666667
1    0.333333
Name: proportion, dtype: float64

In [10]:
y_test.value_counts(normalize=True)


Species
0    0.666667
1    0.333333
Name: proportion, dtype: float64

## Train the Model

In [11]:
model = LogisticRegression().fit(X_train.values, y_train.values)
model

## Model Evaluation

In [12]:
y_train_pred = model.predict(X_train.values)
y_test_pred = model.predict(X_test.values)

print(f'Train Acc: {accuracy_score(y_train, y_train_pred)}')
print(f'Test Acc: {accuracy_score(y_test, y_test_pred)}')

Train Acc: 1.0
Test Acc: 1.0


In [13]:
print(classification_report(y_test, y_test_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Model Output Format


In [14]:
y_train_pred


array([0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [15]:
X_train.values[0]


array([6. , 2.7, 5.1, 1.6])

In [16]:
model.predict(np.array([[6. , 2.7, 5.1, 1.6]]))


array([0])

## Serialization


In [17]:
joblib.dump(model, 'model.joblib')


['model.joblib']