In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

from tqdm import tqdm
import os
import cv2
import threading
import time

In [3]:
RANDOM_SEED = 42
IMAGE_SIZE = (512,512)
LOCK = threading.Lock()

In [4]:
def load_images_from_nested_folder(folder, image_size=(512,512)):
    images = []
    labels = []
    class_names = []
    class_idx = 0

    for class_folder in tqdm(os.listdir(folder)):
        class_path = os.path.join(folder, class_folder)
        if not os.path.isdir(class_path):
            continue

        class_names.append(class_folder)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, image_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                images.append(img.flatten())
                labels.append(class_idx)
        
        class_idx += 1
    print(f"Loaded {len(images)} images.")
    return np.array(images), np.array(labels), class_names

X, y, class_names = load_images_from_nested_folder("realwaste-main/RealWaste", image_size=IMAGE_SIZE)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=RANDOM_SEED)


100%|██████████| 9/9 [00:10<00:00,  1.15s/it]


In [None]:
def run_knn(k, X_train, X_test, y_train, y_test):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

for k in [1,2,3,5,10,25,50, 100]:
    start = time.time()
    accuracy = run_knn(k)
    end = time.time()

    print(f"Accuracy for k={k} KNN is {accuracy*100:.2f}% runtime: {(end-start):.0f}s")

In [10]:
def run_nn(X_train, X_test, y_train, y_test):
    mlp = MLPClassifier(hidden_layer_sizes=(128,128), max_iter=1000, random_state=RANDOM_SEED)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy


run_nn(X_train, X_test, y_train, y_test)

0.19516044187269857

In [14]:
def run_svm(kernel):
    svm = SVC(kernel=kernel)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy


for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
    start = time.time()
    accuracy = run_svm(kernel)
    end = time.time()

    print(f"Accuracy for kernel={kernel} SVM is {accuracy*100:.2f}% runtime: {(end-start):.0f}s")

Accuracy for kernel=linear SVM is 34.74% runtime: 688s
