In [1]:
from skimage.feature import local_binary_pattern
from skimage.color import rgb2gray
from skimage.transform import resize
from skimage.feature import hog
import numpy as np
import cv2
from PIL import Image

def preprocess_image(image_path, target_size=(128, 128)):
    # Load image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Resize
    image_resized = resize(image, target_size, anti_aliasing=True)

    # Normalize to [0, 1]
    image_resized = image_resized.astype(np.float32)
    
    return image_resized

In [2]:
import torch
from torchvision import transforms
from PIL import Image

def predict_cnn(image_path, cnn_model, label_encoder, device='cpu'):
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    img = Image.open(image_path)
    img_tensor = transform(img).unsqueeze(0).to(device)

    cnn_model.eval()
    with torch.no_grad():
        output = cnn_model(img_tensor)
        _, predicted = torch.max(output, 1)

    return label_encoder.inverse_transform([predicted.cpu().numpy()[0]])[0]

def predict_svm_flat(image_path, svm, pca, label_encoder):
    image = preprocess_image(image_path)
    flat = image.flatten().reshape(1, -1)
    reduced = pca.transform(flat)
    prediction = svm.predict(reduced)
    return label_encoder.inverse_transform(prediction)[0]

def extract_hog_lbp(image, cell_size=(16, 16)):
    gray = rgb2gray(image)
    
    # HOG
    hog_feat = hog(gray, pixels_per_cell=cell_size, cells_per_block=(2, 2), feature_vector=True)

    # LBP
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points+3), density=True)

    return np.concatenate((hog_feat, lbp_hist))

def predict_svm_hl(image_path, svm, pca, label_encoder):
    image = preprocess_image(image_path)
    features = extract_hog_lbp(image).reshape(1, -1)
    reduced = pca.transform(features)
    prediction = svm.predict(reduced)
    return label_encoder.inverse_transform(prediction)[0]


In [3]:
tensor = torch.tensor([1.0, 2.0]).to("xpu")
device = torch.device("xpu" if torch.xpu.is_available() else "cpu")
device

AssertionError: Torch not compiled with XPU enabled

In [8]:
device = torch.device("cpu")
device

device(type='cpu')

In [9]:
import torch.nn as nn
import torch.nn.functional as F

class LeafCNN(nn.Module):
    def __init__(self, num_classes):
        super(LeafCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [B, 32, 64, 64]
        x = self.pool(F.relu(self.conv2(x)))  # [B, 64, 32, 32]

        x = x.view(-1, 64 * 32 * 32)  # Flatten
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

class LeafDCNN(nn.Module):
    def __init__(self, num_classes):
        super(LeafDCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 16 * 16, 2048)
        self.fc2 = nn.Linear(2048, 1000)
        self.fc3 = nn.Linear(1000, 500)
        self.fc4 = nn.Linear(500, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Conv1 -> ReLU -> MaxPool
        x = self.pool(F.relu(self.conv2(x)))  # Conv2 -> ReLU -> MaxPool
        x = self.pool(F.relu(self.conv3(x)))  # Conv3 -> ReLU -> MaxPool
        x = F.relu(self.conv4(x))  # Conv4 -> ReLU

        x = x.view(-1, 64 * 16 * 16)  # Flatten

        x = F.relu(self.fc1(x))  # Fully Connected Layer 1
        x = self.dropout(x)
        x = F.relu(self.fc2(x))  # Fully Connected Layer 2
        x = F.relu(self.fc3(x))  # Fully Connected Layer 3
        x = self.fc4(x)  # Softmax Output
        return x
    
class LeafDCNN_BN(nn.Module):
    def __init__(self, num_classes):
        super(LeafDCNN_BN, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.25)

        self.bn1 = nn.BatchNorm2d(8)
        self.bn2 = nn.BatchNorm2d(16)
        self.bn3 = nn.BatchNorm2d(32)
        self.bn4 = nn.BatchNorm2d(64)
        self.bn5 = nn.BatchNorm2d(128)

        self.fc1 = nn.Linear(128 * 4 * 4, 4096)
        self.fc2 = nn.Linear(4096, 2048)
        self.fc3 = nn.Linear(2048, 1000)
        self.fc4 = nn.Linear(1000, 500)
        self.fc5 = nn.Linear(500, num_classes)

    def forward(self, x):
        x = self.bn1(self.pool(F.relu(self.conv1(x))))  # Conv1 -> ReLU -> MaxPool -> BN
        x = self.bn2(self.pool(F.relu(self.conv2(x))))  # Conv2 -> ReLU -> MaxPool -> BN
        x = self.bn3(self.pool(F.relu(self.conv3(x))))  # Conv3 -> ReLU -> MaxPool -> BN
        x = self.bn4(self.pool(F.relu(self.conv4(x))))  # Conv4 -> ReLU -> MaxPool -> BN
        x = self.bn5(self.pool(F.relu(self.conv5(x))))  # Conv5 -> ReLU -> MaxPool -> BN

        x = x.view(-1, 128 * 4 * 4)  # Flatten

        x = F.relu(self.fc1(x))  # Fully Connected Layer 1
        x = self.dropout(x)
        x = F.relu(self.fc2(x))  # Fully Connected Layer 2
        x = F.relu(self.fc3(x))  # Fully Connected Layer 3
        x = F.relu(self.fc4(x))  # Fully Connected Layer 4
        x = self.fc5(x)  # Softmax Output
        return x


In [11]:
import joblib
import torch

# Load saved models
svm_flat_model = joblib.load("svm_flat.pkl")
pca_flat_model = joblib.load("pca_flat.pkl")

svm_hl_model = joblib.load("svm_hl.pkl")
pca_hl_model = joblib.load("pca_hl.pkl")

label_encoder = joblib.load("label_encoder.pkl")

# Load CNN
cnn_model = LeafCNN(num_classes=len(label_encoder.classes_)).to(device)
cnn_model.load_state_dict(torch.load("leafcnn_model.pt", map_location=device))
cnn_model.eval()

# Load DCNN
dcnn_model = LeafDCNN(num_classes=len(label_encoder.classes_)).to(device)
dcnn_model.load_state_dict(torch.load("leafdcnn_model.pt", map_location=device))
dcnn_model.eval()

# Load DCNNBN
dcnnbn_model = LeafDCNN_BN(num_classes=len(label_encoder.classes_)).to(device)
dcnnbn_model.load_state_dict(torch.load("leafdcnnbn_model.pt", map_location=device))
dcnnbn_model.eval()

# Predict from image path
image_path = "leafsnap-dataset/leafsnap-dataset/dataset/images/lab/syringa_reticulata/ny1161-02-4.jpg"

print("CNN Prediction:", predict_cnn(image_path, cnn_model, label_encoder, device))
print("DCNN Prediction:", predict_cnn(image_path, dcnn_model, label_encoder, device))
print("DCNNBN Prediction:", predict_cnn(image_path, dcnnbn_model, label_encoder, device))
print("PCA+SVM Prediction:", predict_svm_flat(image_path, svm_flat_model, pca_flat_model, label_encoder))
print("HOG+LBP+SVM Prediction:", predict_svm_hl(image_path, svm_hl_model, pca_hl_model, label_encoder))


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  cnn_model.load_state_dict(torch.load("leafcnn_model.pt", map_location=device))
  dcnn_model.load_state_dict(torch.load("leafdcnn_model.pt", map_location=device))
  dcnnbn_model.load_state_dict(torch.load("leafdcnnbn_model.pt", map_location=device))


FileNotFoundError: [Errno 2] No such file or directory: 'leafsnap-dataset/leafsnap-dataset/dataset/images/lab/syringa_reticulata/ny1161-02-4.jpg'

# GUI

In [13]:
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
from PIL import Image, ImageTk
import cv2
import numpy as np
import joblib
import matplotlib.pyplot as plt
import torch
import threading
import time

streaming = False

# Load models
cnn_model = LeafCNN(num_classes=len(label_encoder.classes_)).to(device)
cnn_model.load_state_dict(torch.load("leafcnn_model.pt", map_location=device))
dcnn_model = LeafDCNN(num_classes=len(label_encoder.classes_)).to(device)
dcnn_model.load_state_dict(torch.load("leafdcnn_model.pt", map_location=device))
dcnnbn_model = LeafDCNN_BN(num_classes=len(label_encoder.classes_)).to(device)
dcnnbn_model.load_state_dict(torch.load("leafdcnnbn_model.pt", map_location=device))
pca_flat_model = joblib.load("pca_flat.pkl")
svm_flat_model = joblib.load("svm_flat.pkl")
pca_hl_model = joblib.load("pca_hl.pkl")
svm_hl_model = joblib.load("svm_hl.pkl")

# Label mapping
class_labels = label_encoder.classes_

def start_live_classification():
    global streaming
    streaming = True
    thread = threading.Thread(target=live_prediction_loop)
    thread.start()

def stop_live_classification():
    global streaming
    streaming = False

def live_prediction_loop():
    cap = cv2.VideoCapture(0)

    while streaming:
        ret, frame = cap.read()
        if not ret:
            break

        label = predict(frame, model_choice.get())

        # Show in Tkinter
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_pil = Image.fromarray(image_rgb)
        image_pil = image_pil.resize((200, 200))
        img_tk = ImageTk.PhotoImage(image_pil)
        image_label.config(image=img_tk)
        image_label.image = img_tk
        prediction_var.set(f"Prediction: {label}")

        root.update()
        time.sleep(0.2)

    cap.release()
    prediction_var.set("Live prediction stopped.")

def pre_cnn(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image_rgb)
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    return transform(image_pil).unsqueeze(0).to(device)

def pre_svm(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_resized = resize(image, (128, 128), anti_aliasing=True)
    return image_resized.astype(np.float32)

def cnn(image):
    img_tensor = pre_cnn(image)
    cnn_model.eval()
    with torch.no_grad():
        output = cnn_model(img_tensor)
        _, predicted = torch.max(output, 1)

    return label_encoder.inverse_transform([predicted.cpu().numpy()[0]])[0]

def dcnn(image):
    img_tensor = pre_cnn(image)
    dcnn_model.eval()
    with torch.no_grad():
        output = dcnn_model(img_tensor)
        _, predicted = torch.max(output, 1)

    return label_encoder.inverse_transform([predicted.cpu().numpy()[0]])[0]

def dcnnbn(image):
    img_tensor = pre_cnn(image)
    dcnnbn_model.eval()
    with torch.no_grad():
        output = dcnnbn_model(img_tensor)
        _, predicted = torch.max(output, 1)

    return label_encoder.inverse_transform([predicted.cpu().numpy()[0]])[0]

def svm_flat(image):
    resized = pre_svm(image)
    flat = resized.flatten().reshape(1, -1)
    reduced = pca_flat_model.transform(flat)
    prediction = svm_flat_model.predict(reduced)
    return label_encoder.inverse_transform(prediction)[0]

def svm_hl(image):
    resized = pre_svm(image)
    gray = rgb2gray(resized)
    hog_feat = hog(gray, pixels_per_cell=(16, 16), cells_per_block=(2, 2), feature_vector=True)
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points+3), density=True)
    features = np.concatenate((hog_feat, lbp_hist)).reshape(1, -1)
    reduced = pca_hl_model.transform(features)
    prediction = svm_hl_model.predict(reduced)
    return label_encoder.inverse_transform(prediction)[0]

def predict(image, model_choice):
    if model_choice == "CNN":
        label = cnn(image)

    elif model_choice == "DCNN":
        label = dcnn(image)
    
    elif model_choice == "DCNN w/ BN":
        label = dcnnbn(image)

    elif model_choice == "PCA+SVM":
        label = svm_flat(image)

    elif model_choice == "HOG+LBP+PCA+SVM":
        label = svm_hl(image)

    elif model_choice == "ALL":
        label = "\n" + "CNN: " + cnn(image) + "\n" + "DCNN: " + dcnn(image) + "\n" + "DCNN w/ BN: " + dcnnbn(image) + "\n" + "SVM FLAT: " + svm_flat(image) + "\n" + "SVM HL: " + svm_hl(image)

    return label

def load_image():
    path = filedialog.askopenfilename()
    image = cv2.imread(path)
    label = predict(image, model_choice.get())
    update_gui(image, label)

def capture_webcam():
    cap = cv2.VideoCapture(0)
    ret, frame = cap.read()
    cap.release()
    if ret:
        label, pred_dict = predict(frame, model_choice.get())
        update_gui(frame, label, pred_dict)

def update_gui(image, label):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image_rgb)
    image_pil = image_pil.resize((200, 200))
    img_tk = ImageTk.PhotoImage(image_pil)
    image_label.config(image=img_tk)
    image_label.image = img_tk
    prediction_var.set(f"Prediction: {label}")

# GUI setup
root = tk.Tk()
root.title("Leaf Classifier")

frame = tk.Frame(root)
frame.pack()

image_label = tk.Label(frame)
image_label.grid(row=0, column=0, columnspan=3, pady=10)

prediction_var = tk.StringVar()
prediction_label = tk.Label(frame, textvariable=prediction_var, font=('Arial', 14))
prediction_label.grid(row=1, column=0, columnspan=3)

model_choice = tk.StringVar(value="CNN")
model_menu = ttk.Combobox(frame, textvariable=model_choice, values=["CNN", "DCNN", "DCNN w/ BN", "PCA+SVM", "HOG+LBP+PCA+SVM", "ALL"])
model_menu.grid(row=2, column=1)

load_btn = tk.Button(frame, text="Load Image", command=load_image)
load_btn.grid(row=3, column=0, padx=5, pady=10)

cam_btn = tk.Button(frame, text="Use Webcam", command=capture_webcam)
cam_btn.grid(row=3, column=2, padx=5, pady=10)

live_btn = tk.Button(frame, text="Start Live", command=start_live_classification)
live_btn.grid(row=4, column=0, padx=5, pady=10)

stop_btn = tk.Button(frame, text="Stop Live", command=stop_live_classification)
stop_btn.grid(row=4, column=2, padx=5, pady=10)

root.mainloop()


  cnn_model.load_state_dict(torch.load("leafcnn_model.pt", map_location=device))
  dcnn_model.load_state_dict(torch.load("leafdcnn_model.pt", map_location=device))
  dcnnbn_model.load_state_dict(torch.load("leafdcnnbn_model.pt", map_location=device))
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Exception in Tkinter callback
Traceback (most recent call last):
  File "/nix/store/29a6wlyr860xg3i9n3pzavrlggmyxzkr-python3-3.13.1/lib/python3.13/tkinter/__init__.py", line 2068, in __call__
    return self.func(*args)
           ~~~~~~~~~^^^^^^^
  File "/tmp/nix-shell-8567-0/ipykernel_9296/784005410.py", line 150, in load_image
    image = cv2.imread(path)
TypeError: Can't convert object of type 'tuple' to 'str' for 'filename'
