# Building Classification models using Iris Dataset

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
data=pd.read_excel('C:/Users/deepesh/Downloads/iris_data.xls')
data.head()

Unnamed: 0,SL,SW,PL,PW,Classification
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
SL                143 non-null float64
SW                144 non-null float64
PL                144 non-null float64
PW                150 non-null float64
Classification    150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [4]:
data.shape

(150, 5)

In [5]:
data['Classification'].value_counts()

Iris-versicolor    50
Iris-virginica     50
Iris-setosa        50
Name: Classification, dtype: int64

In [6]:
# Filling Missing Values in SL,SW,PL,PW columns with mean value
for i in ['SL','SW','PL','PW']:
    data[i]=data[i].fillna(data[i].mean())

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
SL                150 non-null float64
SW                150 non-null float64
PL                150 non-null float64
PW                150 non-null float64
Classification    150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [8]:
data.describe()

Unnamed: 0,SL,SW,PL,PW
count,150.0,150.0,150.0,150.0
mean,5.855944,3.049306,3.75625,1.198667
std,0.80848,0.421884,1.725479,0.763161
min,4.3,2.0,1.0,0.1
25%,5.2,2.8,1.6,0.3
50%,5.8,3.0,4.2,1.3
75%,6.4,3.275,5.1,1.8
max,7.9,4.4,6.9,2.5


In [9]:
# Seperating Target field and features
y=data['Classification']
x=data.drop('Classification',axis=1)

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
#splitting dataset into training and testing data
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=42,test_size=0.20)

# Logistic Regression model

In [33]:
from sklearn.linear_model import LogisticRegression
logit_model=LogisticRegression()
logit_model=logit_model.fit(x_train,y_train)
y_pred_logit=logit_model.predict(x_test)



In [34]:
from sklearn.metrics import confusion_matrix

In [35]:
confusion_matrix(y_test,y_pred_logit)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

# Calculating Accuracy,Recall,Precision and F1 score for logistic regression model

In [36]:
from sklearn.metrics import accuracy_score

In [37]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred_logit,digits=3))
print('Accuracy:',accuracy_score(y_test,y_pred_logit))

                 precision    recall  f1-score   support

    Iris-setosa      1.000     1.000     1.000        10
Iris-versicolor      1.000     1.000     1.000         9
 Iris-virginica      1.000     1.000     1.000        11

      micro avg      1.000     1.000     1.000        30
      macro avg      1.000     1.000     1.000        30
   weighted avg      1.000     1.000     1.000        30

Accuracy: 1.0


# K-NN Classification Model 

In [38]:
from sklearn.neighbors import KNeighborsClassifier

In [39]:
metric_k=[]    #empty list
neighbors=range(3,15)
for k in neighbors:
    classifr=KNeighborsClassifier(n_neighbors=k)
    classifr=classifr.fit(x_train,y_train)
    y_pred_KNN=classifr.predict(x_test)
    acc=accuracy_score(y_test,y_pred_KNN)
    metric_k.append(acc)

In [40]:
metric_k

[0.9666666666666667,
 1.0,
 0.9666666666666667,
 0.9666666666666667,
 0.9666666666666667,
 0.9666666666666667,
 0.9666666666666667,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]

In [41]:
#building model with K=5
classifr=KNeighborsClassifier(n_neighbors=5)
classifr=classifr.fit(x_train,y_train)
y_pred_KNN=classifr.predict(x_test)

# Calculating Accuracy,Recall,Precision and F1 score for K-NN model

In [42]:
confusion_matrix(y_test,y_pred_KNN)

array([[10,  0,  0],
       [ 0,  8,  1],
       [ 0,  0, 11]], dtype=int64)

In [43]:
print(metrics.classification_report(y_test,y_pred_KNN,digits=3))
print('Accuracy:',accuracy_score(y_test,y_pred_KNN))

                 precision    recall  f1-score   support

    Iris-setosa      1.000     1.000     1.000        10
Iris-versicolor      1.000     0.889     0.941         9
 Iris-virginica      0.917     1.000     0.957        11

      micro avg      0.967     0.967     0.967        30
      macro avg      0.972     0.963     0.966        30
   weighted avg      0.969     0.967     0.966        30

Accuracy: 0.9666666666666667


# Support Vector Machine classification model

In [44]:
from sklearn.svm import SVC

In [45]:
svm_classifr=SVC(kernel='linear')
svm_classifr=svm_classifr.fit(x_train,y_train)
y_pred_svm=svm_classifr.predict(x_test)

In [46]:
confusion_matrix(y_test,y_pred_svm)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

# Calculating Accuracy,Recall,Precision and F1 score for SVM model

In [47]:
from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(y_test, y_pred_svm))
print('Accuracy:',accuracy_score(y_test,y_pred_svm))


Classification Report

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

      micro avg       1.00      1.00      1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0


# Decision tree classification model

In [48]:
from sklearn.tree import DecisionTreeClassifier

In [49]:
dt_classifr=DecisionTreeClassifier()
dt_classifr=dt_classifr.fit(x_train,y_train)
y_pred_df=dt_classifr.predict(x_test)

In [50]:
confusion_matrix(y_test,y_pred_df)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

# Calculating Accuracy,Recall,Precision and F1 score for Decision tree model

In [51]:
print(classification_report(y_test, y_pred_df, target_names=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
print('Accuracy:',accuracy_score(y_test,y_pred_df))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

      micro avg       1.00      1.00      1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0


# Random Forest Classification model

In [52]:
from sklearn.ensemble import RandomForestClassifier

In [53]:
rf_classifr=RandomForestClassifier()
rf_classifr=rf_classifr.fit(x_train,y_train)
y_pred_rf=rf_classifr.predict(x_test)



In [54]:
confusion_matrix(y_test,y_pred_rf)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

# Calculating Accuracy,Recall,Precision and F1 score for Random forest model

In [55]:
print(classification_report(y_test, y_pred_rf, target_names=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
print('Accuracy:',accuracy_score(y_test,y_pred_rf))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

      micro avg       1.00      1.00      1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0


# Accuracy is 100%(1.0) for Logistic Regression,SVC,Decision tree and Random Forest classifier.These models are preffered for Iris dataset.
# Accuracy is 96%(0.9666) for K-NN model.So it is not preffered here.