In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap, LocallyLinearEmbedding
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import silhouette_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn_extra.cluster import KMedoids
from sklearn.neighbors import KernelDensity

In [2]:
train_df = pd.read_csv('../generated dataset/train.csv')
valid_df = pd.read_csv('../generated dataset/valid.csv')
test_df = pd.read_csv('../generated dataset/test.csv')

In [4]:
X_train = train_df.drop(columns=['Image', 'Label'])
y_train = train_df['Label']
X_valid = valid_df.drop(columns=['Image', 'Label'])
y_valid = valid_df['Label']
X_test = test_df.drop(columns=['Image', 'Label'])

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

In [29]:
dim_reduction_methods = {
    "PCA": PCA(n_components=2),  # Adjust components as necessary
    "Isomap": Isomap(n_components=2),
    "Kernel PCA": KernelPCA(n_components=2, kernel='rbf'),
    "LLE": LocallyLinearEmbedding(n_components=2)
}

In [30]:
def evaluate_classifiers(X_train_reduced, X_valid_reduced, y_train, y_valid):
    classifiers = {
        "SVM": SVC(),
        "KNN": KNeighborsClassifier(),
        "Random Forest": RandomForestClassifier(),
    }

    for name, clf in classifiers.items():
        clf.fit(X_train_reduced, y_train)
        y_pred = clf.predict(X_valid_reduced)
        print(f"\n{name} Results:")
        print(confusion_matrix(y_valid, y_pred))
        print(classification_report(y_valid, y_pred))

## Dimensionality Reduction and Evaluation

In [31]:
for name, method in dim_reduction_methods.items():
    print(f"\nApplying {name}...")
    X_train_reduced = method.fit_transform(X_train_scaled)
    X_valid_reduced = method.transform(X_valid_scaled)
    
    evaluate_classifiers(X_train_reduced, X_valid_reduced, y_train, y_valid)


Applying PCA...

SVM Results:
[[ 0  1  0  1  0  0  0  0  0  0]
 [ 0  0  0  0  1  0  0  4  4  0]
 [ 0  3  0  0  0  0  0  1  1  0]
 [ 0  0  0  1  0  0  0  0  1  0]
 [ 0  0  0  0  2  0  0  0  2  0]
 [ 0  1  0  0  0  0  0  2  0  0]
 [ 0  6  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  0  2  1  0]
 [ 0  1  0  0  0  0  0  0 60  0]
 [ 0  2  0  1  0  0  0  4  0  0]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.00      0.00      0.00         9
      Ashoka       0.00      0.00      0.00         5
   DrumStick       0.33      0.50      0.40         2
    Fittonia       0.67      0.50      0.57         4
    Hibiscus       0.00      0.00      0.00         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.13      0.67      0.22         3
        Neem       0.87      0.98      0.92        61
     Parijat       0.00      0.00      0.00         7

    accuracy                           0.6

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



KNN Results:
[[ 0  0  0  0  1  0  0  0  0  1]
 [ 2  0  0  0  0  0  0  3  4  0]
 [ 0  1  0  0  1  1  0  1  1  0]
 [ 0  0  0  0  0  1  0  0  1  0]
 [ 1  0  0  0  0  0  0  0  3  0]
 [ 1  1  0  0  0  0  0  1  0  0]
 [ 2  1  0  0  1  2  0  1  0  1]
 [ 1  0  0  0  0  1  0  0  1  0]
 [ 1  0  4  1  0  0  0  0 55  0]
 [ 2  3  0  0  0  1  0  0  0  1]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.00      0.00      0.00         9
      Ashoka       0.00      0.00      0.00         5
   DrumStick       0.00      0.00      0.00         2
    Fittonia       0.00      0.00      0.00         4
    Hibiscus       0.00      0.00      0.00         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.00      0.00      0.00         3
        Neem       0.85      0.90      0.87        61
     Parijat       0.33      0.14      0.20         7

    accuracy                           0.54       104
   ma

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



SVM Results:
[[ 0  0  0  0  0  1  0  1  0  0]
 [ 0  3  0  0  0  0  0  2  4  0]
 [ 0  2  0  0  0  1  0  1  1  0]
 [ 0  0  0  0  0  0  0  0  2  0]
 [ 0  1  0  0  0  0  0  0  3  0]
 [ 0  0  0  0  0  1  0  1  1  0]
 [ 0  5  1  0  0  0  0  2  0  0]
 [ 0  1  0  0  0  0  0  0  2  0]
 [ 0  0  0  6  0  5  0  0 50  0]
 [ 0  1  0  0  0  3  0  2  1  0]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.23      0.33      0.27         9
      Ashoka       0.00      0.00      0.00         5
   DrumStick       0.00      0.00      0.00         2
    Fittonia       0.00      0.00      0.00         4
    Hibiscus       0.09      0.33      0.14         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.00      0.00      0.00         3
        Neem       0.78      0.82      0.80        61
     Parijat       0.00      0.00      0.00         7

    accuracy                           0.52       104
   ma

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Results:
[[ 0  0  0  0  0  0  0  0  0  2]
 [ 0  1  1  0  1  1  2  0  3  0]
 [ 1  1  1  0  1  0  0  0  1  0]
 [ 0  0  0  1  0  0  0  0  1  0]
 [ 0  1  0  0  1  0  0  0  2  0]
 [ 1  0  0  0  0  1  0  0  1  0]
 [ 2  1  1  0  2  0  0  2  0  0]
 [ 0  1  0  0  0  0  0  0  2  0]
 [ 3  1  0  4  7  1  0  0 44  1]
 [ 3  0  1  0  1  0  0  0  1  1]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.17      0.11      0.13         9
      Ashoka       0.25      0.20      0.22         5
   DrumStick       0.20      0.50      0.29         2
    Fittonia       0.08      0.25      0.12         4
    Hibiscus       0.33      0.33      0.33         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.00      0.00      0.00         3
        Neem       0.80      0.72      0.76        61
     Parijat       0.25      0.14      0.18         7

    accuracy                           0.48      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Results:
[[ 0  1  0  0  0  0  1  0  0  0]
 [ 0  0  1  0  0  2  0  2  4  0]
 [ 0  0  1  0  1  1  0  0  1  1]
 [ 0  0  0  0  0  1  0  0  1  0]
 [ 0  0  0  0  2  0  0  0  2  0]
 [ 2  0  0  0  0  0  0  0  1  0]
 [ 1  0  2  0  1  2  0  2  0  0]
 [ 2  0  0  0  0  1  0  0  0  0]
 [ 1  2  0  3  7  0  0  0 48  0]
 [ 1  0  1  0  0  3  0  2  0  0]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.00      0.00      0.00         9
      Ashoka       0.20      0.20      0.20         5
   DrumStick       0.00      0.00      0.00         2
    Fittonia       0.18      0.50      0.27         4
    Hibiscus       0.00      0.00      0.00         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.00      0.00      0.00         3
        Neem       0.84      0.79      0.81        61
     Parijat       0.00      0.00      0.00         7

    accuracy                           0.49      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Results:
[[ 1  0  0  0  0  0  0  0  1  0]
 [ 1  2  0  0  1  0  1  0  2  2]
 [ 0  2  0  0  1  1  0  1  0  0]
 [ 0  1  0  0  0  0  0  0  0  1]
 [ 0  1  0  0  1  0  0  0  2  0]
 [ 0  0  0  0  1  1  0  0  1  0]
 [ 0  4  0  0  2  0  0  2  0  0]
 [ 0  0  0  0  0  0  0  3  0  0]
 [ 1  4  2  5 12  1  0  1 33  2]
 [ 0  3  0  0  1  2  0  0  1  0]]
              precision    recall  f1-score   support

      Ajwain       0.33      0.50      0.40         2
      Almond       0.12      0.22      0.15         9
      Ashoka       0.00      0.00      0.00         5
   DrumStick       0.00      0.00      0.00         2
    Fittonia       0.05      0.25      0.09         4
    Hibiscus       0.20      0.33      0.25         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.43      1.00      0.60         3
        Neem       0.82      0.54      0.65        61
     Parijat       0.00      0.00      0.00         7

    accuracy                           0.39      

### Parzen Window (Linear Classifier)

In [32]:
def parzen_window_classifier(X_train, y_train, X_valid):
    classes = np.unique(y_train)
    densities = {}

    for c in classes:
        kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X_train[y_train == c])
        densities[c] = kde

    log_densities = np.array([densities[c].score_samples(X_valid) for c in classes]).T
    return classes[np.argmax(log_densities, axis=1)]

In [33]:
def evaluate_parzen(X_train, y_train, X_valid, y_valid):
    y_pred_parzen = parzen_window_classifier(X_train, y_train, X_valid)
    print("Parzen Window Classifier Results:")
    print(confusion_matrix(y_valid, y_pred_parzen))
    print(classification_report(y_valid, y_pred_parzen))

In [34]:
for name, method in dim_reduction_methods.items():
    print(f"\nApplying {name} for Parzen Window...")
    X_train_reduced = method.fit_transform(X_train_scaled)
    X_valid_reduced = method.transform(X_valid_scaled)
    
    evaluate_parzen(X_train_reduced, y_train, X_valid_reduced, y_valid)


Applying PCA for Parzen Window...
Parzen Window Classifier Results:
[[ 1  0  0  0  1  0  0  0  0  0]
 [ 0  0  0  0  0  0  2  2  4  1]
 [ 0  1  1  0  0  2  0  0  1  0]
 [ 0  0  1  0  0  0  0  0  1  0]
 [ 0  1  0  0  2  0  0  0  0  1]
 [ 0  0  1  0  0  0  1  1  0  0]
 [ 1  1  0  0  1  1  0  3  0  1]
 [ 1  0  0  1  0  0  0  1  0  0]
 [ 3  4  8  5  3  0  0  0 38  0]
 [ 1  2  0  0  0  0  0  1  2  1]]
              precision    recall  f1-score   support

      Ajwain       0.14      0.50      0.22         2
      Almond       0.00      0.00      0.00         9
      Ashoka       0.09      0.20      0.12         5
   DrumStick       0.00      0.00      0.00         2
    Fittonia       0.29      0.50      0.36         4
    Hibiscus       0.00      0.00      0.00         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.12      0.33      0.18         3
        Neem       0.83      0.62      0.71        61
     Parijat       0.25      0.14      0.18         7

    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Parzen Window Classifier Results:
[[ 0  0  0  0  0  1  0  0  0  1]
 [ 0  0  0  0  0  0  0  0  3  6]
 [ 0  0  0  0  0  2  0  1  1  1]
 [ 0  0  0  1  0  0  0  0  0  1]
 [ 0  0  0  0  0  0  0  0  2  2]
 [ 0  0  0  0  0  1  0  0  0  2]
 [ 0  0  0  0  0  3  0  0  0  5]
 [ 0  0  0  0  0  0  0  3  0  0]
 [ 0  0  0  9  0  0  0  0 31 21]
 [ 0  0  0  0  0  1  0  0  0  6]]
              precision    recall  f1-score   support

      Ajwain       0.00      0.00      0.00         2
      Almond       0.00      0.00      0.00         9
      Ashoka       0.00      0.00      0.00         5
   DrumStick       0.10      0.50      0.17         2
    Fittonia       0.00      0.00      0.00         4
    Hibiscus       0.12      0.33      0.18         3
   JackFruit       0.00      0.00      0.00         8
       Mango       0.75      1.00      0.86         3
        Neem       0.84      0.51      0.63        61
     Parijat       0.13      0.86      0.23         7

    accuracy                           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Clustering Method

In [35]:
def evaluate_clustering(X, method_name):
    if method_name == "K-Means":
        labels = KMeans(n_clusters=10, random_state=42).fit_predict(X)
    elif method_name == "K-Medoids":
        labels = KMedoids(n_clusters=10).fit_predict(X)
    elif method_name == "DBSCAN":
        labels = DBSCAN(eps=0.5, min_samples=5).fit_predict(X)
    else:
        raise ValueError("Unknown clustering method")

    if len(set(labels)) > 1 and -1 not in labels:
        silhouette_avg = silhouette_score(X, labels)
        print(f"{method_name} Silhouette Score: {silhouette_avg}")
    else:
        print(f"{method_name} could not compute silhouette score.")

In [36]:
clustering_methods = ["K-Means", "K-Medoids", "DBSCAN"]
for name in clustering_methods:
    for method in dim_reduction_methods.values():
        print(f"\nApplying {name} clustering after {method.__class__.__name__}...")
        X_train_reduced = method.fit_transform(X_train_scaled)
        evaluate_clustering(X_train_reduced, name)


Applying K-Means clustering after PCA...
K-Means Silhouette Score: 0.4328074632924149

Applying K-Means clustering after Isomap...
K-Means Silhouette Score: 0.46470605069970705

Applying K-Means clustering after KernelPCA...
K-Means Silhouette Score: 0.43162309576275126

Applying K-Means clustering after LocallyLinearEmbedding...
K-Means Silhouette Score: 0.5223112128444037

Applying K-Medoids clustering after PCA...
K-Medoids Silhouette Score: 0.3595152778481091

Applying K-Medoids clustering after Isomap...
K-Medoids Silhouette Score: 0.32437559965239254

Applying K-Medoids clustering after KernelPCA...
K-Medoids Silhouette Score: 0.4120820595062275

Applying K-Medoids clustering after LocallyLinearEmbedding...
K-Medoids Silhouette Score: 0.2524522056110111

Applying DBSCAN clustering after PCA...
DBSCAN could not compute silhouette score.

Applying DBSCAN clustering after Isomap...
DBSCAN could not compute silhouette score.

Applying DBSCAN clustering after KernelPCA...
DBSCAN coul