In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_excel('iris_flower_data.xlsx')
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
data.shape

(150, 6)

In [4]:
data.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [6]:
data['Species'].value_counts()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64

In [7]:
X = data.iloc[:,1:5]
Y = data['Species']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.25)

In [9]:
set = [X_train, X_test, Y_train, Y_test]
for i in set:
    print(i.shape)

(112, 4)
(38, 4)
(112,)
(38,)


In [10]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## Decision Tree Classifier model

In [11]:
dt_clf = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, min_samples_leaf = 5, random_state = 50)
dt_clf.fit(X_train, Y_train)

In [12]:
predict1 = dt_clf.predict(X_test)
predict1

array(['Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-virginica', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica'], dtype=object)

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [20]:
print('Decision Tree Classifier results :')
print('accuracy_score :', accuracy_score(Y_test,predict1)*100)
print('precision_score :', precision_score(Y_test,predict1, average = 'weighted')*100)
print('recall_score :', recall_score(Y_test,predict1, average = 'weighted')*100)
print('f1_score :', f1_score(Y_test,predict1, average = 'weighted')*100)

Decision Tree Classifier results :
accuracy_score : 84.21052631578947
precision_score : 84.32748538011697
recall_score : 84.21052631578947
f1_score : 83.9572192513369


## Random Forest Classifier Model

In [15]:
rt_clf = RandomForestClassifier()
rt_clf.fit(X_train, Y_train)

In [16]:
predict2 = rt_clf.predict(X_test)
predict2

array(['Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-virginica', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica'], dtype=object)

In [21]:
print('Random Forest Classifier result :')
print('accuracy_score : {}'.format(accuracy_score(Y_test,predict2)*100))
print('precision_score : {}'.format(precision_score(Y_test,predict2, average = 'weighted')*100))
print('recall_score : {}'.format(recall_score(Y_test,predict2, average = 'weighted')*100))
print('f1_score : {}'.format(f1_score(Y_test,predict2, average = 'weighted')*100))

Random Forest Classifier result :
accuracy_score : 92.10526315789474
precision_score : 92.33468286099864
recall_score : 92.10526315789474
f1_score : 92.1358234295416


## predicting the data

In [18]:
data_test = pd.read_excel('iris_flower_data.xlsx', sheet_name = 1 ,index_col = None)
data_test = data_test.iloc[:,1:5]
data_test.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.3,3.7,1.5,0.2
1,6.9,3.1,4.9,1.5
2,5.0,3.3,1.4,0.2
3,5.6,2.8,4.9,2.0
4,7.7,2.8,6.7,2.0


In [19]:
rt_clf.predict(data_test)

array(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-versicolor'], dtype=object)