In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
train_df = pd.read_csv('generated_features_train.csv')
valid_df = pd.read_csv('generated_features_valid.csv')
test_df = pd.read_csv('generated_features_test.csv')

In [3]:
X_train, y_train = train_df.drop(columns=['Image', 'Class']), train_df['Class']
X_valid, y_valid = valid_df.drop(columns=['Image', 'Class']), valid_df['Class']
X_test, y_test = test_df.drop(columns=['Image', 'Class']), test_df['Class']

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [5]:
results = {}

Given a model it will evaluate with the help of the earlier dataset

In [6]:
def evaluate_model(name, model, X_train, y_train, X_valid, y_valid, X_test, y_test):
    print(f"\nTraining {name} model...")
    model.fit(X_train, y_train)

    y_valid_pred = model.predict(X_valid)
    print(f"\nValidation Performance of {name}:")
    print(confusion_matrix(y_valid, y_valid_pred))
    print(classification_report(y_valid, y_valid_pred))

    y_test_pred = model.predict(X_test)
    print(f"\nTest Performance of {name}:")
    print(confusion_matrix(y_test, y_test_pred))
    print(classification_report(y_test, y_test_pred))

### 1. Support Vector Machine

In [7]:
svm_model = SVC(kernel='linear', C=1)
evaluate_model("SVM", svm_model, X_train, y_train, X_valid, y_valid, X_test, y_test)


Training SVM model...

Validation Performance of SVM:
[[2 0 0 0 0 1 0 0 0 1]
 [0 5 0 1 0 2 1 0 0 0]
 [0 0 3 0 0 0 0 0 0 1]
 [0 0 0 3 0 0 0 0 0 0]
 [0 1 0 1 3 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [1 0 0 0 0 1 2 1 0 0]
 [0 0 0 0 0 1 0 5 0 0]
 [0 1 1 2 0 0 0 0 2 0]
 [0 0 2 0 0 1 0 0 0 0]]
              precision    recall  f1-score   support

      Ajwain       0.67      0.50      0.57         4
      Almond       0.71      0.56      0.62         9
      Ashoka       0.50      0.75      0.60         4
  Drum_Stick       0.43      1.00      0.60         3
    Fittonia       1.00      0.60      0.75         5
    Hibiscus       0.25      1.00      0.40         2
  Jack Fruit       0.67      0.40      0.50         5
       Mango       0.83      0.83      0.83         6
        Neem       1.00      0.33      0.50         6
     unknown       0.00      0.00      0.00         3

    accuracy                           0.57        47
   macro avg       0.61      0.60      0.54        47
weighted a

### 2. Parzen Window (Linear Classifier)

In [8]:
from sklearn.naive_bayes import GaussianNB
parzen_model = GaussianNB()
evaluate_model("Parzen Windows (Gaussian Naive Bayes)", parzen_model, X_train, y_train, X_valid, y_valid, X_test, y_test)



Training Parzen Windows (Gaussian Naive Bayes) model...

Validation Performance of Parzen Windows (Gaussian Naive Bayes):
[[3 0 1 0 0 0 0 0 0 0]
 [2 5 0 0 1 0 0 0 1 0]
 [0 0 4 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0]
 [0 3 0 0 2 0 0 0 0 0]
 [0 0 0 0 1 1 0 0 0 0]
 [3 0 0 1 0 0 1 0 0 0]
 [4 1 0 0 1 0 0 0 0 0]
 [0 3 1 2 0 0 0 0 0 0]
 [1 0 2 0 0 0 0 0 0 0]]
              precision    recall  f1-score   support

      Ajwain       0.23      0.75      0.35         4
      Almond       0.42      0.56      0.48         9
      Ashoka       0.50      1.00      0.67         4
  Drum_Stick       0.50      1.00      0.67         3
    Fittonia       0.40      0.40      0.40         5
    Hibiscus       1.00      0.50      0.67         2
  Jack Fruit       1.00      0.20      0.33         5
       Mango       0.00      0.00      0.00         6
        Neem       0.00      0.00      0.00         6
     unknown       0.00      0.00      0.00         3

    accuracy                           0.40       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### 3. k-Nearest Neighbor

In [9]:
knn_model = KNeighborsClassifier(n_neighbors=5)
evaluate_model("KNN", knn_model, X_train, y_train, X_valid, y_valid, X_test, y_test)


Training KNN model...

Validation Performance of KNN:
[[3 0 0 0 0 0 0 0 0 1]
 [1 4 0 0 0 2 1 0 0 1]
 [0 0 3 0 0 0 0 0 0 1]
 [0 0 0 3 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 3 0 0]
 [2 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 1 2 1 0 0]
 [1 2 1 0 0 0 0 2 0 0]
 [0 3 1 0 0 0 0 0 2 0]
 [2 0 1 0 0 0 0 0 0 0]]
              precision    recall  f1-score   support

      Ajwain       0.33      0.75      0.46         4
      Almond       0.33      0.44      0.38         9
      Ashoka       0.50      0.75      0.60         4
  Drum_Stick       1.00      1.00      1.00         3
    Fittonia       0.00      0.00      0.00         5
    Hibiscus       0.00      0.00      0.00         2
  Jack Fruit       0.67      0.40      0.50         5
       Mango       0.33      0.33      0.33         6
        Neem       1.00      0.33      0.50         6
     unknown       0.00      0.00      0.00         3

    accuracy                           0.40        47
   macro avg       0.42      0.40      0.38        47
weighted a

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### 4. Random Forest 

In [10]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
evaluate_model("Random Forest", rf_model, X_train, y_train, X_valid, y_valid, X_test, y_test)


Training Random Forest model...

Validation Performance of Random Forest:
[[1 0 0 0 0 0 0 0 0 3]
 [1 5 0 0 0 2 1 0 0 0]
 [0 0 4 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0]
 [0 1 0 1 1 0 1 1 0 0]
 [0 0 0 0 0 1 0 1 0 0]
 [1 0 0 0 1 0 2 1 0 0]
 [0 2 0 0 1 1 0 2 0 0]
 [0 3 1 1 0 0 0 0 1 0]
 [1 0 1 0 0 0 0 1 0 0]]
              precision    recall  f1-score   support

      Ajwain       0.25      0.25      0.25         4
      Almond       0.45      0.56      0.50         9
      Ashoka       0.67      1.00      0.80         4
  Drum_Stick       0.60      1.00      0.75         3
    Fittonia       0.33      0.20      0.25         5
    Hibiscus       0.25      0.50      0.33         2
  Jack Fruit       0.50      0.40      0.44         5
       Mango       0.33      0.33      0.33         6
        Neem       1.00      0.17      0.29         6
     unknown       0.00      0.00      0.00         3

    accuracy                           0.43        47
   macro avg       0.44      0.44      0.39 