In [18]:
import numpy as np
import pandas as pd

import os
import cv2
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing Images

In [49]:
def preprocess_image(image_path, output_path, size=(256, 256)):
    image = cv2.imread(image_path)

    if image is None:
        print(f"Failed to load image: {image_path}")
        return

    # Convert the image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Remove noise using GaussianBlur
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)

    # Resize the image
    resized_image = cv2.resize(blurred_image, size)

    # Save the processed image
    cv2.imwrite(output_path, resized_image)


In [50]:
root_dir = '/content/drive/MyDrive/Colab Notebooks/leaf-classification/leaves'
output_dir = '/content/drive/MyDrive/Colab Notebooks/leaf-classification/leaves-processed'
os.makedirs(output_dir, exist_ok=True)


for folder_name in os.listdir(root_dir):
  folder_path = os.path.join(root_dir, folder_name)

  if os.path.isdir(folder_path):
    output_folder_path = os.path.join(output_dir, folder_name)
    os.makedirs(output_folder_path, exist_ok=True)

    for img_name in os.listdir(folder_path):
      img_path = os.path.join(folder_path, img_name)
      output_img_path = os.path.join(output_folder_path, img_name)
      # process and save the image
      preprocess_image(img_path, output_img_path)


In [9]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/leaf-classification/leaves.csv')
print(df.shape)
df.head()

(339, 16)


Unnamed: 0,1,1.1,0.72694,1.4742,0.32396,0.98535,1.2,0.83592,0.0046566,0.0039465,0.04779,0.12795,0.016108,0.0052323,0.00027477,1.1756
0,1,2,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
1,1,3,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
2,1,4,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
3,1,5,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214
4,1,6,0.72997,1.4892,0.34284,0.98755,1.0,0.84482,0.004945,0.004451,0.010487,0.058528,0.003414,0.001125,2.5e-05,0.34068


In [10]:
df_x = df.drop('1', axis='columns')
df_label = df['1']

In [11]:
x_train, x_test, y_train, y_test = train_test_split(df_x, df_label, test_size=0.2, shuffle=True, random_state=42)
print(x_train.shape, type(x_train))
print(x_test.shape, type(x_test))
print(y_train.shape, type(y_train))
print(y_test.shape, type(y_test))

(271, 15) <class 'pandas.core.frame.DataFrame'>
(68, 15) <class 'pandas.core.frame.DataFrame'>
(271,) <class 'pandas.core.series.Series'>
(68,) <class 'pandas.core.series.Series'>


In [12]:
x_train, y_train = x_train.to_numpy(), y_train.to_numpy()
x_test, y_test = x_test.to_numpy(), y_test.to_numpy()

In [13]:
# Scale
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [16]:
# Define classifiers
classifiers = {
    'SVM': SVC(C=1.0, kernel='rbf', gamma='scale', degree=3),
    'Logistic Regression': LogisticRegression(solver='lbfgs', max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=10),
    'Decision Tree': DecisionTreeClassifier(max_depth=10, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42),
    'Naive Bayes': GaussianNB(),
    'MLP Classifier': MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=1000, random_state=42)
}


results = []
conf_matrices = {}
classification_reports = {}
# Function to evaluate and collect performance
def evaluate_model(name, model, x_test, y_test):
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)

    results.append({
        'Classifier': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })
    conf_matrices[name] = conf_matrix

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(x_train, y_train)
    evaluate_model(name, clf, x_test, y_test)

results_df = pd.DataFrame(results)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
results_df.sort_values(by='F1 Score', ascending=False)

Unnamed: 0,Classifier,Accuracy,Precision,Recall,F1 Score
6,MLP Classifier,0.911765,0.953922,0.911765,0.917647
1,Logistic Regression,0.867647,0.897549,0.867647,0.871429
4,Random Forest,0.852941,0.895588,0.852941,0.861485
5,Naive Bayes,0.75,0.814776,0.75,0.75989
3,Decision Tree,0.720588,0.784069,0.720588,0.723553
0,SVM,0.661765,0.575035,0.661765,0.597263
2,K-Nearest Neighbors,0.588235,0.610819,0.588235,0.535918


In [None]:

# fig, axes = plt.subplots(len(classifiers), 1, figsize=(10, len(classifiers) * 5))

# for i, (name, conf_matrix) in enumerate(conf_matrices.items()):
#     sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[i])
#     axes[i].set_title(f'Confusion Matrix for {name}')
#     axes[i].set_xlabel('Predicted')
#     axes[i].set_ylabel('Actual')

# plt.tight_layout()
# plt.show()