# Iris Flower Classification

### Importing Required Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


from warnings import filterwarnings
filterwarnings(action='ignore')

### Loading the Dataset

In [None]:
iris_df=pd.read_csv("IRIS.csv")

In [None]:
iris_df.head()

### Printing the whole table

In [None]:
print(iris_df)

In [None]:
print(iris_df.shape)

In [None]:
print(iris_df.describe())

### Checking for null values

In [None]:
print(iris_df.isna().sum())
print(iris_df.describe())

In [None]:
iris_df.head()

In [None]:
iris_df.head(150)

In [None]:
iris_df.tail(100)

In [None]:
n = len(iris_df[iris_df['species'] == 'versicolor'])
print("No of Versicolor in Dataset:",n)

In [None]:
n1 = len(iris_df[iris_df['species'] == 'virginica'])
print("No of Virginica in Dataset:",n1)

In [None]:
n2 = len(iris_df[iris_df['species'] == 'setosa'])
print("No of Setosa in Dataset:",n2)

### Plotting the percentage of different species present in the dataset

In [None]:
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
l = ['Versicolor', 'Setosa', 'Virginica']
s = [50,50,50]
ax.pie(s, labels = l,autopct='%1.2f%%')
plt.show()

### Checking for outliars

In [None]:
import matplotlib.pyplot as plt
plt.figure(1)
plt.boxplot([iris_df['sepal_length']])
plt.figure(2)
plt.boxplot([iris_df['sepal_width']])
plt.show()

### Plotting histogram for Sepal Length, Sepal Width, Petal Length, Petal Width

In [None]:
iris_df.hist()
plt.show()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))  # 2 rows, 2 columns
iris_df.plot(kind='density', subplots=True, layout=(2, 2), sharex=False, ax=axes)
plt.tight_layout()
plt.show()

### Box and whisker plots -> For understanding the distribution of input attributes

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))  # 2 rows, 2 columns
iris_df.plot(kind='box', subplots=True, layout=(2, 2), sharex=False, ax=axes)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(2,2,1)
custom_palette = ['#ff9999', '#66b3ff', '#99ff99']
sns.violinplot(x='species',y='petal_length',data=iris_df, palette=custom_palette)
plt.subplot(2,2,2)
sns.violinplot(x='species',y='petal_width',data=iris_df, palette=custom_palette)
plt.subplot(2,2,3)
sns.violinplot(x='species',y='sepal_length',data=iris_df, palette=custom_palette)
plt.subplot(2,2,4)
sns.violinplot(x='species',y='sepal_width',data=iris_df, palette=custom_palette)

In [None]:
sns.pairplot(iris_df,hue='species')

### Convert the 'species' column to numeric values using LabelEncoder

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
iris_df['species'] = le.fit_transform(iris_df['species'])

### Heat Maps

In [None]:
fig=plt.gcf()
fig.set_size_inches(10,7)
fig=sns.heatmap(iris_df.corr(),annot=True,cmap='cubehelix',linewidths=1,linecolor='k',square=True,mask=False, vmin=-1, vmax=1,cbar_kws={"orientation": "vertical"},cbar=True)

In [None]:
X = iris_df['sepal_length'].values.reshape(-1,1)
print(X)

In [None]:
Y = iris_df['sepal_width'].values.reshape(-1,1)
print(Y)

### Plotting Scatter Graph between Sepal Length and Sepal Width

In [None]:
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.scatter(X,Y,color='g')
plt.show()

### Calculate the correlation matrix

In [None]:
corr_mat = iris_df.corr()
print(corr_mat)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

### Creating Train and Test model for the prediction model

In [None]:
train, test = train_test_split(iris_df, test_size = 0.25)
print(train.shape)
print(test.shape)

In [None]:
train_X = train[['sepal_length', 'sepal_width', 'petal_length',
                 'petal_width']]
train_y = train.species

test_X = test[['sepal_length', 'sepal_width', 'petal_length',
                 'petal_width']]
test_y = test.species

In [None]:
train_X.head()

In [None]:
test_y.head()

### Using Logistic Regression

In [None]:
model = LogisticRegression()
model.fit(train_X, train_y)
prediction = model.predict(test_X)
print('Accuracy:',metrics.accuracy_score(prediction,test_y))

### Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
confusion_mat = confusion_matrix(test_y,prediction)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(test_y,prediction))

### Using Support Vector

In [None]:
from sklearn.svm import SVC
model1 = SVC()
model1.fit(train_X,train_y)

pred_y = model1.predict(test_X)

from sklearn.metrics import accuracy_score
print("Acc=",accuracy_score(test_y,pred_y))

### Using KNN Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model2 = KNeighborsClassifier(n_neighbors=5)
model2.fit(train_X,train_y)
y_pred2 = model2.predict(test_X)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(test_y,y_pred2))

### Using GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB
model3 = GaussianNB()
model3.fit(train_X,train_y)
y_pred3 = model3.predict(test_X)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(test_y,y_pred3))

### Using Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
model4 = DecisionTreeClassifier(criterion='entropy',random_state=7)
model4.fit(train_X,train_y)
y_pred4 = model4.predict(test_X)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(test_y,y_pred4))

### Results of all the Models and their Accuracy Scores

In [None]:
results = pd.DataFrame({
    'Model': ['Logistic Regression','Support Vector Machines', 'Naive Bayes','KNN' ,'Decision Tree'],
    'Score': [0.947,0.947,0.947,0.947,0.921]})

result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
result_df.head(9)