#### Importing necessary libraries

In [1]:
import pandas as pd  
import numpy as np  
import matplotlib as plt 
from sklearn.model_selection import train_test_split 

#### Importing Iris.csv file

In [2]:
df = pd.read_csv("Iris.csv")

In [3]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


Looking for the dataset records

#### Checking for the shape(rows and columns)

In [4]:
df.shape

(150, 6)

Statistical values of the dataset

In [5]:
df.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


Looking for the null values if any

In [6]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

Number of distinct values of flower types

In [7]:
df['Species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

Encoding the types of flower for model training

In [8]:
df["encoded"] = df['Species'].map({"Iris-setosa" : 1, "Iris-versicolor": 2, "Iris-virginica": 3})

In [9]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,encoded
0,1,5.1,3.5,1.4,0.2,Iris-setosa,1
1,2,4.9,3.0,1.4,0.2,Iris-setosa,1
2,3,4.7,3.2,1.3,0.2,Iris-setosa,1
3,4,4.6,3.1,1.5,0.2,Iris-setosa,1
4,5,5.0,3.6,1.4,0.2,Iris-setosa,1


In [10]:
df[['Species',"encoded"]].value_counts()

Species          encoded
Iris-setosa      1          50
Iris-versicolor  2          50
Iris-virginica   3          50
dtype: int64

Excluding those columns which have no use and will ruin the model training

In [11]:
training_dataset = df.drop(["Id","Species"],axis=1)

In [12]:
training_dataset.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,encoded
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1


Dividing Dataset in X and Y columns where X contains all columns of training_dataset except excoded columns which is Y column

In [13]:
X = training_dataset.iloc[:, :-1]
Y = training_dataset.iloc[:, -1]

In [14]:
print(X.shape)
print(Y.shape)

(150, 4)
(150,)


dividing the X and Y dataset for training and testing dataframes in 80% and 20% respectively with random of 5 for variation and suffle True.

In [15]:
x_train,x_test, y_train,y_test = train_test_split(X,Y, test_size = 0.2, random_state = 5, shuffle=True) 

In [16]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 4)
(30, 4)
(120,)
(30,)


## Importing All the required Models for training

In [17]:
from sklearn.svm import SVC # type: ignore
from sklearn.neighbors import KNeighborsClassifier # type: ignore
from sklearn.linear_model import LogisticRegression # type: ignore
from sklearn.metrics import classification_report # type: ignore

## Support Vector Machine Model


In [18]:
svc = SVC()
svc.fit(x_train,y_train)

In [19]:
y_pred_svc = svc.predict(x_test)
print(classification_report(y_test,y_pred_svc))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         8
           2       1.00      0.91      0.95        11
           3       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



## K Nearest Neighbors Model

In [20]:
knn = KNeighborsClassifier()
knn.fit(x_train,y_train)

In [21]:
y_pred_knn = knn.predict(x_test)
print(classification_report(y_test,y_pred_knn))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         8
           2       1.00      0.82      0.90        11
           3       0.85      1.00      0.92        11

    accuracy                           0.93        30
   macro avg       0.95      0.94      0.94        30
weighted avg       0.94      0.93      0.93        30



## Logistic Regression Model

In [22]:
LR = LogisticRegression(max_iter=10000)
LR.fit(x_train,y_train)

In [23]:
y_pred_LR = LR.predict(x_test)
print(classification_report(y_test,y_pred_LR))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         8
           2       1.00      0.91      0.95        11
           3       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



# Conclusion
### From all of the above Trained models Support vector classification and Logistic Regression work better with 97% accuracy