**OASIS INFOBYTE**

**NAME - BHOLA ROY**
# Iris Flower Classification Data


**The Iris Flower classification dataset comprises 150 samples of Iris flowers , categorized into three species :**

1. Iris Versicolor
2. Iris Virginica 
3. Iris Setosa

In [None]:
import numpy as np
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

**DATASET**

In [None]:
df = pd.read_csv("/kaggle/input/iriscsv/Iris.csv")
df.tail()

**Processing the Dataset**

In [None]:
# Drop the 'Id' column as it is not required for analysis
df = df.drop(columns=["Id"])

In [None]:
#Display basic statistics about the data
df.describe().transpose()

In [None]:
#checking for null values
df.isnull().sum()

In [None]:
print('Shape of the dataset : ',df.shape)

In [None]:
df.info()

In [None]:
# Display the number of samples for each class
df['Species'].value_counts()

In [None]:
#Label encoding to convert class labels into numeric form
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])
df['Species']

In [None]:
df

**Data Analysis**

In [None]:
sns.countplot(x='Species',data=df,palette=['yellow','orange','g'])


In [None]:
#analysing distribution of columns values
sns.swarmplot(x=df['Species'],y=df['SepalLengthCm'],color='g')

In [None]:
sns.swarmplot(x=df['Species'],y=df['SepalWidthCm'],color='r')

In [None]:
sns.swarmplot(x=df['Species'],y=df['PetalLengthCm'],color='y')

In [None]:
sns.swarmplot(x=df['Species'],y=df['PetalWidthCm'],color='orange')

In [None]:
#Plotting the histogram of all features toghether
df['SepalLengthCm'].hist()
df['SepalWidthCm'].hist()
df['PetalLengthCm'].hist()
df['PetalWidthCm'].hist()

In [None]:
sns.pairplot(df,hue='Species')

**Correlation Matrix**

In [None]:
# Compute the correlation matrix 
df.corr().transpose()

In [None]:
# display the correlation matrix using a heatmap
corr = df.corr()
fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, annot=True, ax=ax, cmap='YlGnBu')

In [None]:
X = df.drop(columns=['Species'])
Y = df['Species']

In [None]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.30,random_state=1)

In [None]:
# Logistic Regression Model
from sklearn.linear_model import LogisticRegression
logmodel = LogisticRegression()
logmodel.fit(x_train, y_train)
logreg_Accuracy = logmodel.score(x_test, y_test) * 100
print("Accuracy (Logistic Regression): ", logreg_Accuracy)

In [None]:
# K-nearest Neighbours Model (KNN)
from sklearn.neighbors import KNeighborsClassifier
knnmodel = KNeighborsClassifier()
knnmodel.fit(x_train,y_train)
knn_Accuracy = knnmodel.score(x_test,y_test)*100
print("Accuracy (KNN): ", knn_Accuracy)

In [None]:
# Decision Tree Model
from sklearn.tree import DecisionTreeClassifier
dtmodel = DecisionTreeClassifier()
dtmodel.fit(x_train,y_train)
dt_Accuracy = dtmodel.score(x_test,y_test)*100
print("Accuracy (Decision Tree): ", dt_Accuracy)

**Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
y_pred1 = logmodel.predict(x_test)
y_pred2 = knnmodel.predict(x_test)
y_pred3 = dtmodel.predict(x_test)

In [None]:
conf_matrix1 = confusion_matrix(y_test, y_pred1)
conf_matrix2 = confusion_matrix(y_test, y_pred2)
conf_matrix3 = confusion_matrix(y_test, y_pred3)

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix1, annot=True, fmt='d', cmap='cividis', xticklabels=np.unique(Y), yticklabels=np.unique(Y))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix of Logistic Regression')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix2, annot=True, fmt='d', cmap='inferno', xticklabels=np.unique(Y), yticklabels=np.unique(Y))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix of KNN')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix3, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(Y), yticklabels=np.unique(Y))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix of Decision Tree')
plt.show()

*REPORT*

In [None]:
# Model Comparison - Visualization
models = ['Logistic Regression', 'KNN', 'Decision Tree']
accuracies = [logreg_Accuracy, knn_Accuracy, dt_Accuracy]

plt.bar(models, accuracies, color=['violet', 'green', 'orange'])
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.title("Model Comparison - Accuracy")
plt.ylim([0, 100])
plt.show()