# Iris Flower Classification (Task-1)

### - Priyanshu Jha 

In [1]:
# importing required libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
iris = pd.read_csv("Iris.csv", na_values=[" ?"])

irisNew = iris.copy()

In [3]:
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [5]:
iris.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [6]:
iris.isna().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [7]:
#corelation matrix to check inter depandability of columns
iris.drop('Species', axis=1).corr()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
Id,1.0,0.716676,-0.397729,0.882747,0.899759
SepalLengthCm,0.716676,1.0,-0.109369,0.871754,0.817954
SepalWidthCm,-0.397729,-0.109369,1.0,-0.420516,-0.356544
PetalLengthCm,0.882747,0.871754,-0.420516,1.0,0.962757
PetalWidthCm,0.899759,0.817954,-0.356544,0.962757,1.0


In [8]:
np.unique(iris["Species"])
# iris['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [9]:
x = iris.drop('Species', axis=1)
print(x)

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
0      1            5.1           3.5            1.4           0.2
1      2            4.9           3.0            1.4           0.2
2      3            4.7           3.2            1.3           0.2
3      4            4.6           3.1            1.5           0.2
4      5            5.0           3.6            1.4           0.2
..   ...            ...           ...            ...           ...
145  146            6.7           3.0            5.2           2.3
146  147            6.3           2.5            5.0           1.9
147  148            6.5           3.0            5.2           2.0
148  149            6.2           3.4            5.4           2.3
149  150            5.9           3.0            5.1           1.8

[150 rows x 5 columns]


In [10]:
y = iris['Species']
print(y)

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object


In [11]:
# Replace categorical values with numerical values using rename and replace

y = iris['Species'].replace(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'],[0, 1, 2])
print(y)

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int64


In [12]:
train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.3, random_state=0)

In [13]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(train_x)
X_test = scaler.transform(test_x)

In [14]:
# Make an instance of the Model
logistic = LogisticRegression(max_iter=500)

# Fitting the values for x and y
logistic.fit(train_x,train_y)

In [15]:
# Prediction from test data
prediction = logistic.predict(test_x)
print(prediction)

[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 1 1 1 2 0 2 0 0]


In [16]:
# Evaluate the model
accuracy = accuracy_score(test_y, prediction)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [17]:
confusionMatrix = confusion_matrix(test_y, prediction)
print("Confusion Matrix:")
print(confusionMatrix)

Confusion Matrix:
[[16  0  0]
 [ 0 18  0]
 [ 0  0 11]]


In [18]:
report = classification_report(test_y, prediction)
print("Classification Report:")
print(report)

# Printing the misclassified values from prediction
print(f'Misclassified samples: {(test_y != prediction).sum()}')

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        11

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Misclassified samples: 0


In [19]:
# Create a Random Forest classifier
RandomForest = RandomForestClassifier()

# Train the classifier
RandomForest.fit(train_x,train_y)

In [20]:
# Prediction from test data
prediction2 = RandomForest.predict(test_x)
print(prediction2)

[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 1 1 1 2 0 2 0 0]


In [21]:
# Evaluate the model
accuracy = accuracy_score(test_y, prediction2)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [22]:
confusionMatrix = confusion_matrix(test_y, prediction2)
print("Confusion Matrix:")
print(confusionMatrix)

Confusion Matrix:
[[16  0  0]
 [ 0 18  0]
 [ 0  0 11]]


In [23]:
report = classification_report(test_y, prediction2)
print("Classification Report:")
print(report)

# Printing the misclassified values from prediction2
print(f'Misclassified samples: {(test_y != prediction2).sum()}')

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        11

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Misclassified samples: 0
