In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
%matplotlib inline

# Load the Iris dataset
iris_data = load_iris()
iris_data1 = pd.read_csv('D:\ML\ML Mini Project\Iris_data\Iris.csv')

# Access the feature matrix (X) and target vector (y)
X = iris_data.data
y = iris_data.target

# iris_data['data']

In [None]:
iris_data1

In [None]:
#Keys present in the Iris dataset
iris_data.keys()

In [None]:
#Creating a DataFrame which includes the Features of the Iris dataset and its Numeric Values
iris_df = pd.DataFrame(data = iris_data['data'], columns = iris_data['feature_names'])

iris_df.info()

In [None]:
iris_data1.info()

In [None]:
#This is what the Iris DataFrame looks like
iris_df.head(15)

In [None]:
#Now let's include the Type of Iris in the Iris DataFrame
iris_df['Iris type'] = iris_data['target']

iris_df.head()

In [None]:
#Naming each Type of Iris
def f(x):
    if x == 0:
        val = 'setosa'
    elif x == 1:
        val = 'versicolor'
    else:
        val = 'virginica'
    return val

iris_df['Iris_name'] = iris_df['Iris type'].apply(f)

iris_df.head()

In [None]:
#A sample of Data entries of each Type of Iris
iris_df.groupby(['Iris_name']).head()

In [None]:
#Violin plot of the Iris DataFrame
def plot_violin(y2,i):
    plt.subplot(2,2,i)

    sns.violinplot(x='Iris_name',y= y2, data=iris_df)

plt.figure(figsize=(17,12))
i = 1
for measurement in iris_df.columns[:-2]:
    plot_violin(measurement,i)
    sns.despine(offset=10, trim=True)
    i += 1

In [None]:
#PairPlot of the Iris DataFrame
sns.pairplot(iris_df, hue = 'Iris_name', vars = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], palette = 'Set1' )

In [None]:
#Defining a Confusion Matrix Function
def print_confusion_matrix(confusion_matrix, class_names, figsize = (9,7), fontsize=14):
  df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names,
    )
  figu = plt.figure(figsize=figsize)
  try:
      heatmap = sns.heatmap(df_cm, annot=True, fmt="d", cbar=False)
  except ValueError:
    raise ValueError("Confusion matrix values must be integers.")
  heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
  heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
  plt.ylabel('True label', fontsize = 12)
  plt.xlabel('Predicted label', fontsize = 12)
  plt.title('Confusion Matrix', fontsize = 16)

In [None]:
iris_data1

In [None]:
#Train and Test Split for KNN Classifier
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)','petal width (cm)']]
y = iris_df['Iris_name']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state = 28)

In [None]:
#KNN classifier Used Here
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred1 = knn.predict(X_test)
y_pred1

In [None]:
data1 = {'Original Species Name': y_test, 'Predicted Species Name': y_pred1}
df1 = pd.DataFrame(data=data1)
df1

In [None]:
X_test

In [None]:
#Accuracy of KNN
print(f'Model Accuracy with k=5: {knn.score(X_test, y_test)}')

In [None]:
#Classification Report of KNN
print(classification_report(y_test, y_pred1))

In [None]:
#Confusion Matrix for KNN
print_confusion_matrix(confusion_matrix(y_test, y_pred1), ['sentosa', 'versicolor', 'virginica'])

In [None]:
#Prediction vs Actuality of the KNN classifier
fig = plt.figure(figsize=(15,7))

ax1 = fig.add_subplot(1,2,1)
ax1 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_pred1, alpha = 0.5)
plt.title('Predicted')
plt.legend(title='Iris name')

ax2 = fig.add_subplot(1,2,2)
ax2 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_test, alpha = 0.5)
plt.title('Actual');

In [None]:
#Train and Test Split for Logistic Regression Classifier
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)','petal width (cm)']]
y = iris_df['Iris_name']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.25, random_state = 39)

In [None]:
#Logistic regression is used here
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(max_iter=1000)
LR.fit(X_train, y_train)
y_pred2 = LR.predict(X_test)
y_pred2

In [None]:
data2 = {'Original Species Name': y_test, 'Predicted Species Name': y_pred2}
df2 = pd.DataFrame(data=data2)
df2

In [None]:
X_test

In [None]:
#Accuracy of LR
print(f'Model Accuracy: {LR.score(X_test, y_test)}')

In [None]:
#Classification Report of LR
print(classification_report(y_test, y_pred2))

In [None]:
#Confusion Matrix for LR
print_confusion_matrix(confusion_matrix(y_test, y_pred2), ['sentosa', 'versicolor', 'virginica'])

In [None]:
#Prediction vs Actuality of the LR Classifier
fig = plt.figure(figsize=(15,7))

ax1 = fig.add_subplot(1,2,1)
ax1 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_pred2, alpha = 0.5)
plt.title('Predicted')
plt.legend(title='Iris name')

ax2 = fig.add_subplot(1,2,2)
ax2 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_test, alpha = 0.5)
plt.title('Actual');

In [None]:
#Train and Test Split for Decision Trees Classifier
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)','petal width (cm)']]
y = iris_df['Iris_name']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state = 51)

In [None]:
#Decision Tree is used here
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
DT.fit(X_train, y_train)
y_pred3 = DT.predict(X_test)
y_pred3

In [None]:
data3 = {'Original Species Name': y_test, 'Predicted Species Name': y_pred3}
df3 = pd.DataFrame(data=data3)
df3

In [None]:
X_test

In [None]:
#Accuracy of DT
print(f'Model Accuracy: {DT.score(X_test, y_test)}')

In [None]:
#Classification Report of DT
print(classification_report(y_test, y_pred3))

In [None]:
#Confusion Matrix for DT
print_confusion_matrix(confusion_matrix(y_test, y_pred3), ['sentosa', 'versicolor', 'virginica'])

In [None]:
#Prediction vs Actuality of the DT Classifier
fig = plt.figure(figsize=(15,7))

ax1 = fig.add_subplot(1,2,1)
ax1 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_pred3, alpha = 0.5)
plt.title('Predicted')
plt.legend(title='Iris name')

ax2 = fig.add_subplot(1,2,2)
ax2 = sns.scatterplot(x = X_test['petal length (cm)'], y = X_test['petal width (cm)'], hue = y_test, alpha = 0.5)
plt.title('Actual');