# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC

from sklearn.model_selection import RandomizedSearchCV

import warnings
warnings.filterwarnings('ignore')

# Importing DataSet

In [2]:
df = pd.read_csv("IRIS_Flower.csv")
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


## EDA 

Here, the dataset will be cleaned, transformed and also Scaling is also done. EDA Means Exploratory Data Analysis.

In [3]:
# 1. Information of the "df"

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [4]:
# 2. Descripitive Analysis of the "df"

df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sepal_length,150.0,5.843333,0.828066,4.3,5.1,5.8,6.4,7.9
sepal_width,150.0,3.054,0.433594,2.0,2.8,3.0,3.3,4.4
petal_length,150.0,3.758667,1.76442,1.0,1.6,4.35,5.1,6.9
petal_width,150.0,1.198667,0.763161,0.1,0.3,1.3,1.8,2.5


In [5]:
# 3. Checking for any Null Data in "df" 

df.isnull().any()

sepal_length    False
sepal_width     False
petal_length    False
petal_width     False
species         False
dtype: bool

In [6]:
# 4. Checking for Correlaation between the Columns

df.corr()

ValueError: could not convert string to float: 'Iris-setosa'

In [None]:
# 4.1 Plotting the Heatmap for Correlation 

sns.heatmap(df.corr(), annot= True)

In [None]:
# 5. Checking for Duplicate Data in "df"

df.duplicated()

In [None]:
# 6. Plotting pairplot for "df"

sns.pairplot(df)

In [None]:
# 7. Getting Labels for the "df["species"]"

labelencoder= LabelEncoder()

df['species'] = labelencoder.fit_transform(df['species'])
df

## Model Validation Technique 

In [None]:
X = df.iloc[:,0:4]
Y = df.iloc[:,4:]

In [None]:
X

In [None]:
Y

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size= 0.4)

In [None]:
x_train

In [None]:
x_test

In [None]:
y_train

In [None]:
y_test

## Model Building 

### 1. Decision Tree Classifier 

In [None]:
model_1 = DecisionTreeClassifier(criterion = 'entropy', max_depth = None)

model_1.fit(x_train, y_train)

In [None]:
# Plotting the Decision Tree

plt.figure(dpi=1200)
tree.plot_tree(model_1);

In [None]:
fn=['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']
cn=['setosa', 'versicolor', 'virginica']
plt.figure(dpi=1200)
tree.plot_tree(model_1,
               feature_names = fn, 
               class_names=cn,
               filled = True);

In [None]:
# Prediction for the Test Data set

pred = model_1.predict(x_test)
pred

In [None]:
# Classification Report for the DataSet

print(classification_report(y_test,pred))

In [None]:
# Accuracy
accuracy = accuracy_score(y_test,pred)
print('Accuracy:', accuracy)

### 1.1 Decision Tree Using Gini  

In [None]:
model_2 = DecisionTreeClassifier(criterion = 'gini', max_depth = None)

model_2.fit(x_train, y_train)

In [None]:
# Plotting the Decision Tree

plt.figure(dpi=1200)
tree.plot_tree(model_2);

In [None]:
fn=['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']
cn=['setosa', 'versicolor', 'virginica']
plt.figure(dpi=1200)
tree.plot_tree(model_2,
               feature_names = fn, 
               class_names=cn,
               filled = True);

In [None]:
# Prediction for the Test Data set

pred_1 = model_2.predict(x_test)
pred_1

In [None]:
# Classification Report for the DataSet

print(classification_report(y_test,pred_1))

In [None]:
# Accuracy
accuracy = accuracy_score(y_test,pred_1)
print('Accuracy:', accuracy)

### 2. Random Forest Classifier 

In [None]:
model_3 = RandomForestClassifier(n_estimators =100, random_state=42)
model_3.fit(x_train,y_train)

In [None]:
# we need to give starting point to plot a Random Forest 
tree_to_plot = model_3.estimators_[0]

In [None]:
# Plotting the Random Forest

plt.figure(dpi=1200)
tree.plot_tree(tree_to_plot);

In [None]:
fn=['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']
cn=['setosa', 'versicolor', 'virginica']
plt.figure(dpi=1200)
tree.plot_tree(tree_to_plot,
               feature_names = fn, 
               class_names=cn,
               filled = True);

In [None]:
# Prediction for the Test Dataset

pred_2 = model_3.predict(x_test)
pred_2

In [None]:
# Classification Report for the Random Forst Classifier 

print(classification_report(y_test,pred_2))

In [None]:
# Accuarcy for the RandomForestClassifier

Accuarcy = accuracy_score(y_test, pred_2)
print("Accuarcy :",Accuarcy)

### 3. K Nearest Neigbhours 

In [None]:
model_4 = KNeighborsClassifier(n_neighbors = 2)
model_4.fit(x_train,y_train)

In [None]:
# Prediction 
pred_3 = model_4.predict(x_test)
pred_3

In [None]:
# Classification Report For KNeighbors

print(classification_report(y_test, pred_3))

In [None]:
# Accuracy for the KNeighbors

Accuracy = accuracy_score(y_test, pred_3)
print("Accuracy :", Accuracy)

### 4. Support Vector Machine (SVM) 

In [None]:
model_5 = SVC()

param_grid = [{'kernel':['linear','rbf'],'gamma':[0.5,0.1,0.01],'C':[10,0.1,0.001,0.0001] }]
gsv = RandomizedSearchCV(model_5,param_grid,cv=6,)
gsv.fit(x_train,y_train)

In [None]:
# Finding the best kernel and gamma and C
gsv.best_params_ , gsv.best_score_

In [None]:
model_5 = SVC(kernel='rbf',gamma=0.5 ,C=0.1)
model_5.fit(x_train , y_train)

In [None]:
#Predicting on test data
preds_4 = model_5.predict(x_test)
preds_4

In [None]:
# Classification Report For KNeighbors

print(classification_report(y_test, preds_4))

In [None]:
# Accuracy for the KNeighbors

Accuracy = accuracy_score(y_test, preds_4)
print("Accuracy :", Accuracy)