In [1]:
# importing required libraries

import  numpy as np
import pandas as pd

df = pd.read_excel('/content/iris.xls')
df.head()

Unnamed: 0,SL,SW,PL,PW,Classification
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [2]:
# pre-processing steps
# checking null values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   SL              143 non-null    float64
 1   SW              144 non-null    float64
 2   PL              144 non-null    float64
 3   PW              150 non-null    float64
 4   Classification  150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [3]:
df.isnull().sum()

SL                7
SW                6
PL                6
PW                0
Classification    0
dtype: int64

In [4]:
df.describe()

Unnamed: 0,SL,SW,PL,PW
count,143.0,144.0,144.0,150.0
mean,5.855944,3.049306,3.75625,1.198667
std,0.828168,0.430644,1.761306,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [5]:
# To insert the mean value of each column into its missing rows:
df.fillna(df.mean(numeric_only=True).round(1), inplace=True)

In [6]:
df.isnull().sum()

SL                0
SW                0
PL                0
PW                0
Classification    0
dtype: int64

In [7]:
df.head()

Unnamed: 0,SL,SW,PL,PW,Classification
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,5.9,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
df.columns

Index(['SL', 'SW', 'PL', 'PW', 'Classification'], dtype='object')

In [9]:
df['Classification'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Classification, dtype: int64

In [11]:
X=df.iloc[:,:4]
y=df.iloc[:,4]

X.head()


Unnamed: 0,SL,SW,PL,PW
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,5.9,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [12]:
y.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Classification, dtype: object

In [13]:
y.shape

(150,)

In [14]:
X.shape

(150, 4)

In [15]:
# Split the Data Into Train and Test Datasets
#training : 80%  and testing : 20%

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.20, random_state=42)

print(X_train)

      SL   SW   PL   PW
22   4.6  3.6  1.0  0.2
15   5.7  4.4  1.5  0.4
65   6.7  3.1  4.4  1.4
11   4.8  3.4  1.6  0.2
42   4.4  3.2  3.8  0.2
..   ...  ...  ...  ...
71   6.1  2.8  4.0  1.3
106  5.9  2.5  4.5  1.7
14   5.8  4.0  1.2  0.2
92   5.8  2.6  4.0  1.2
102  7.1  3.0  5.9  2.1

[120 rows x 4 columns]


In [16]:
print(y_train)

22         Iris-setosa
15         Iris-setosa
65     Iris-versicolor
11         Iris-setosa
42         Iris-setosa
            ...       
71     Iris-versicolor
106     Iris-virginica
14         Iris-setosa
92     Iris-versicolor
102     Iris-virginica
Name: Classification, Length: 120, dtype: object


In [17]:
# Using Classification Algorithms

# 1. K-nearest Neighbors
# Training the model

from sklearn.neighbors import KNeighborsClassifier
# Create and train a K-Nearest Neighbors Classifier (KNN)
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(X_train, y_train)

In [18]:
# Make predictions on the test set
knn_predictions = knn_classifier.predict(X_test)

# Evaluate the accuracy
from sklearn.metrics import accuracy_score
knn_accuracy = accuracy_score(y_test, knn_predictions)

print("K-Nearest Neighbors Classifier Accuracy:", knn_accuracy)

K-Nearest Neighbors Classifier Accuracy: 0.9666666666666667


In [19]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Confusion Matrix and Classification Report
print(confusion_matrix(knn_predictions, y_test))
print(classification_report(knn_predictions, y_test))

[[10  0  0]
 [ 0  8  0]
 [ 0  1 11]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.89      1.00      0.94         8
 Iris-virginica       1.00      0.92      0.96        12

       accuracy                           0.97        30
      macro avg       0.96      0.97      0.97        30
   weighted avg       0.97      0.97      0.97        30



In [20]:
# 2. Support Vector Machine
# Create and train a Support Vector Classifier (SVC)

from sklearn.svm import SVC

svc_classifier = SVC()
svc_classifier.fit(X_train, y_train)

# Make predictions on the test set
svc_predictions = svc_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
svc_accuracy = accuracy_score(y_test, svc_predictions)
print("Support Vector Classifier Accuracy:", svc_accuracy)

# Confusion Matrix and Classification Report
print(confusion_matrix(svc_predictions, y_test))
print(classification_report(svc_predictions, y_test))

Support Vector Classifier Accuracy: 1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [21]:
# 3. Decision Tree Classifier

from sklearn.tree import DecisionTreeClassifier

In [22]:
dtc_classifier = DecisionTreeClassifier()
dtc_classifier.fit(X_train, y_train)

# Make predictions on the test set
dtc_predictions = dtc_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
dtc_accuracy = accuracy_score(y_test, dtc_predictions)
print("Decision Tree Classifier Accuracy:", dtc_accuracy)

# Confusion Matrix and Classification Report
print(confusion_matrix(dtc_predictions , y_test))
print(classification_report(dtc_predictions , y_test))

Decision Tree Classifier Accuracy: 1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



Therefore, Support Vector Machine and Decision Tree Classifier gives best result as there accuracies are 1.0



