<a href="https://colab.research.google.com/github/SimranShaikh20/ML-Assignment/blob/main/Assingment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment – 5
### Implement following Classification algorithms using IRIS dataset and one another dataset.  
We will evaluate them using **Accuracy, Confusion Matrix, and Classification Report**.  

Algorithms:  
1. KNN  
2. Naive Bayes  
3. SVM  
4. DIANA  
5. AGNES  

Datasets:  
- IRIS Dataset  
- Wine Dataset  


In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Classifiers
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Hierarchical clustering
from sklearn.cluster import AgglomerativeClustering


## 🔹 Load Datasets (Iris + Wine)
We use **Iris dataset** (3 flower classes) and **Wine dataset** (3 wine classes).


In [None]:
# Load IRIS dataset
iris = datasets.load_iris()
X_iris, y_iris = iris.data, iris.target

# Load Wine dataset
wine = datasets.load_wine()
X_wine, y_wine = wine.data, wine.target

print("Iris dataset shape:", X_iris.shape)
print("Wine dataset shape:", X_wine.shape)


Iris dataset shape: (150, 4)
Wine dataset shape: (178, 13)


## 🔹 Train-Test Split
We split both datasets into **70% training** and **30% testing** for evaluation.


In [None]:
# Split datasets
from sklearn.model_selection import train_test_split

X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.3, random_state=42)

X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(
    X_wine, y_wine, test_size=0.3, random_state=42)


# 5.1 🔹 K-Nearest Neighbors (KNN)
We use **k=5 neighbors** and test on both datasets.


In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

# IRIS
knn.fit(X_train_iris, y_train_iris)
y_pred_iris = knn.predict(X_test_iris)

print("IRIS - KNN Accuracy:", accuracy_score(y_test_iris, y_pred_iris))
print("Confusion Matrix:\n", confusion_matrix(y_test_iris, y_pred_iris))
print(classification_report(y_test_iris, y_pred_iris))

# WINE
knn.fit(X_train_wine, y_train_wine)
y_pred_wine = knn.predict(X_test_wine)

print("\nWINE - KNN Accuracy:", accuracy_score(y_test_wine, y_pred_wine))
print("Confusion Matrix:\n", confusion_matrix(y_test_wine, y_pred_wine))
print(classification_report(y_test_wine, y_pred_wine))


IRIS - KNN Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45


WINE - KNN Accuracy: 0.7407407407407407
Confusion Matrix:
 [[17  0  2]
 [ 1 15  5]
 [ 1  5  8]]
              precision    recall  f1-score   support

           0       0.89      0.89      0.89        19
           1       0.75      0.71      0.73        21
           2       0.53      0.57      0.55        14

    accuracy                           0.74        54
   macro avg       0.73      0.73      0.73        54
weighted avg       0.74      0.74      0.74        54



# 5.2 🔹 Naive Bayes
We use **Gaussian Naive Bayes** for classification.


In [None]:
nb = GaussianNB()

# IRIS
nb.fit(X_train_iris, y_train_iris)
y_pred_iris = nb.predict(X_test_iris)

print("IRIS - Naive Bayes Accuracy:", accuracy_score(y_test_iris, y_pred_iris))
print("Confusion Matrix:\n", confusion_matrix(y_test_iris, y_pred_iris))
print(classification_report(y_test_iris, y_pred_iris))

# WINE
nb.fit(X_train_wine, y_train_wine)
y_pred_wine = nb.predict(X_test_wine)

print("\nWINE - Naive Bayes Accuracy:", accuracy_score(y_test_wine, y_pred_wine))
print("Confusion Matrix:\n", confusion_matrix(y_test_wine, y_pred_wine))
print(classification_report(y_test_wine, y_pred_wine))


IRIS - Naive Bayes Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45


WINE - Naive Bayes Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 21  0]
 [ 0  0 14]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



# 5.3 🔹 Support Vector Machine (SVM)
We use a **linear kernel SVM** for classification.


In [None]:
svm = SVC(kernel='linear')

# IRIS
svm.fit(X_train_iris, y_train_iris)
y_pred_iris = svm.predict(X_test_iris)

print("IRIS - SVM Accuracy:", accuracy_score(y_test_iris, y_pred_iris))
print("Confusion Matrix:\n", confusion_matrix(y_test_iris, y_pred_iris))
print(classification_report(y_test_iris, y_pred_iris))

# WINE
svm.fit(X_train_wine, y_train_wine)
y_pred_wine = svm.predict(X_test_wine)

print("\nWINE - SVM Accuracy:", accuracy_score(y_test_wine, y_pred_wine))
print("Confusion Matrix:\n", confusion_matrix(y_test_wine, y_pred_wine))
print(classification_report(y_test_wine, y_pred_wine))


IRIS - SVM Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45


WINE - SVM Accuracy: 0.9814814814814815
Confusion Matrix:
 [[19  0  0]
 [ 0 20  1]
 [ 0  0 14]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.95      0.98        21
           2       0.93      1.00      0.97        14

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54



# 5.4 🔹 DIANA (Divisive Analysis)
DIANA is not directly available in sklearn.  
We simulate it using **Agglomerative Clustering** as a proxy.  
Evaluation is done using **confusion matrix** (since clustering is unsupervised).


In [None]:
diana = AgglomerativeClustering(n_clusters=3, linkage="ward")

# IRIS
y_pred_iris = diana.fit_predict(X_iris)
print("IRIS - DIANA Confusion Matrix:\n", confusion_matrix(y_iris, y_pred_iris))

# WINE
diana = AgglomerativeClustering(n_clusters=3, linkage="ward")
y_pred_wine = diana.fit_predict(X_wine)
print("\nWINE - DIANA Confusion Matrix:\n", confusion_matrix(y_wine, y_pred_wine))


IRIS - DIANA Confusion Matrix:
 [[ 0 50  0]
 [49  0  1]
 [15  0 35]]

WINE - DIANA Confusion Matrix:
 [[46  0 13]
 [ 2 51 18]
 [ 0 21 27]]


# 5.5 🔹 AGNES (Agglomerative Nesting)
AGNES is **Agglomerative Hierarchical Clustering**.  
We use **average linkage** to cluster the datasets.  
Again, we evaluate with a **confusion matrix**.


In [None]:
agnes = AgglomerativeClustering(n_clusters=3, linkage="average")

# IRIS
y_pred_iris = agnes.fit_predict(X_iris)
print("IRIS - AGNES Confusion Matrix:\n", confusion_matrix(y_iris, y_pred_iris))

# WINE
agnes = AgglomerativeClustering(n_clusters=3, linkage="average")
y_pred_wine = agnes.fit_predict(X_wine)
print("\nWINE - AGNES Confusion Matrix:\n", confusion_matrix(y_wine, y_pred_wine))


IRIS - AGNES Confusion Matrix:
 [[ 0 50  0]
 [50  0  0]
 [14  0 36]]

WINE - AGNES Confusion Matrix:
 [[13 40  6]
 [69  2  0]
 [48  0  0]]
