Import Libraries

In [41]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score



Load Dataset

In [42]:
df = pd.read_csv('C:/Users/sneha/OneDrive/Desktop/CODSOFT/Task3_IrisClassification/IRIS.csv')
print("\nFirst 5 rows: \n", df.head())
print("\nData Info:\n")
print(df.info())


First 5 rows: 
    sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa

Data Info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None


Missing Values Check

In [43]:
print("\nMissing values:\n", df.isnull().sum())


Missing values:
 sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64


Split features and target

In [44]:
x = df.drop('species', axis=1)
y = df['species']

print("\nUnique target classes:", y.unique())


Unique target classes: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


Encode Target

In [45]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

Train Logistic Regression

In [46]:
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter = 200)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:\n", accuracy_score(y_test, y_pred))


Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Accuracy Score:
 1.0


Saved the Trained Model

In [47]:
joblib.dump(model, 'iris_logistic_model.pkl')

print("Model saved as 'iris_logistic_model.pkl'")

Model saved as 'iris_logistic_model.pkl'
