# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import cv2
import numpy as np
from PIL import Image, ImageOps
import hashlib
from collections import Counter

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report

ModuleNotFoundError: No module named 'cv2'

# Data set loader + Augmentation

In [21]:
def load_dataset(root_folder, img_size=(128,128), min_size=32, target_count=500):
    X, y = [], []
    seen_hashes = set()

    aug_gen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=False,
        fill_mode='nearest'
    )

    classes = [d for d in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, d))]
    print("Classes found:", classes)

    for class_name in classes:
        class_path = os.path.join(root_folder, class_name)
        images_list = os.listdir(class_path)
        class_imgs = []

        for img_name in images_list:
            img_path = os.path.join(class_path, img_name)
            if os.path.getsize(img_path) == 0: continue

            pil_img = Image.open(img_path)
            pil_img = ImageOps.exif_transpose(pil_img)
            pil_img = pil_img.convert("RGB")

            img = np.array(pil_img)
            if img.shape[0] < min_size or img.shape[1] < min_size: continue
            img_hash = hashlib.sha1(img.tobytes()).hexdigest()

            if img_hash in seen_hashes: continue
            seen_hashes.add(img_hash)

            img = cv2.resize(img, img_size)
            img = img.astype(np.float32) / 255.0
            class_imgs.append(img)

        while len(class_imgs) < target_count:
            idx = np.random.randint(0, len(class_imgs))
            aug_img = aug_gen.random_transform(class_imgs[idx])
            class_imgs.append(aug_img)

        X.extend(class_imgs[:target_count])
        y.extend([class_name]*target_count)
        print(f"Class '{class_name}': {len(class_imgs[:target_count])} images loaded")

    print("\n counts per class:", Counter(y))
    return np.array(X), np.array(y)

In [None]:
X, y = load_dataset("C:/Users/roqai/Downloads/dataset")

Classes found: ['plastic', 'metal', 'trash', 'paper', 'cardboard', 'glass']
Class 'plastic': 500 images loaded
Class 'metal': 500 images loaded
Class 'trash': 500 images loaded
Class 'paper': 500 images loaded
Class 'cardboard': 500 images loaded
Class 'glass': 500 images loaded

Final counts per class: Counter({'plastic': 500, 'metal': 500, 'trash': 500, 'paper': 500, 'cardboard': 500, 'glass': 500})


# Unknown class

In [22]:
num_unknown = 500
unknown_imgs = []
unknown_labels = []

In [23]:
for _ in range(num_unknown//2):
    noise_img = np.random.rand(128,128,3).astype(np.float32)
    unknown_imgs.append(noise_img)
    unknown_labels.append("Unknown")

In [24]:
for _ in range(num_unknown//2):
    idx = np.random.randint(0, len(X))
    img = X[idx]
    blur_img = cv2.GaussianBlur(img, (7,7), 0)
    unknown_imgs.append(blur_img)
    unknown_labels.append("Unknown")

X = np.concatenate([X, np.array(unknown_imgs)], axis=0)
y = np.concatenate([y, np.array(unknown_labels)], axis=0)


In [25]:
print("Counts per class:", Counter(y))
print("X shape:", X.shape)

Counts per class: Counter({np.str_('Unknown'): 1000, np.str_('plastic'): 500, np.str_('metal'): 500, np.str_('trash'): 500, np.str_('paper'): 500, np.str_('cardboard'): 500, np.str_('glass'): 500})
X shape: (4000, 128, 128, 3)


# Feature Extraction using MobileNetV2

In [26]:
X_imgs = preprocess_input(X * 255.0)

base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(128,128,3))
feature_extractor = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D()
])

In [27]:
X_cnn = feature_extractor.predict(X_imgs, batch_size=32, verbose=1)
print("CNN feature shape:", X_cnn.shape)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 411ms/step
CNN feature shape: (4000, 1280)


# Train validation split and standard scaler


In [12]:
X_train, X_val, y_train, y_val = train_test_split(
    X_cnn, y, test_size=0.2, stratify=y, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Unknown Class Handle

In [30]:
def predict_with_rejection(model, X, threshold=0.6):
    probs = model.predict_proba(X)
    max_probs = np.max(probs, axis=1)
    pred_labels = model.classes_[np.argmax(probs, axis=1)]
    pred_labels[max_probs < threshold] = "Unknown"
    return pred_labels

# Model Train

### SVM

In [29]:
svm = SVC(kernel="rbf", C=10, gamma="scale", probability=True)
svm.fit(X_train, y_train)


In [31]:
y_pred_svm = predict_with_rejection(svm, X_val, threshold=0.6)

In [32]:
print("SVM validation accuracy:", svm.score(X_val, y_val))


SVM validation accuracy: 0.9342857142857143


In [33]:
print(classification_report(y_val, y_pred_svm))

              precision    recall  f1-score   support

     Unknown       0.69      0.99      0.81       100
   cardboard       0.98      0.96      0.97       100
       glass       0.96      0.86      0.91       100
       metal       0.91      0.86      0.89       100
       paper       0.93      0.86      0.90       100
     plastic       0.92      0.86      0.89       100
       trash       0.99      0.89      0.94       100

    accuracy                           0.90       700
   macro avg       0.91      0.90      0.90       700
weighted avg       0.91      0.90      0.90       700



### KNN

In [34]:
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
knn.fit(X_train, y_train)

In [35]:
y_pred_knn = predict_with_rejection(knn, X_val, threshold=0.6)


In [36]:
print("KNN validation accuracy:", knn.score(X_val, y_val))

KNN validation accuracy: 0.8442857142857143


In [37]:
print(classification_report(y_val, y_pred_knn))

              precision    recall  f1-score   support

     Unknown       0.42      0.81      0.55       100
   cardboard       0.95      0.83      0.89       100
       glass       0.89      0.82      0.85       100
       metal       0.88      0.60      0.71       100
       paper       0.91      0.67      0.77       100
     plastic       0.91      0.77      0.83       100
       trash       0.92      0.94      0.93       100

    accuracy                           0.78       700
   macro avg       0.84      0.78      0.79       700
weighted avg       0.84      0.78      0.79       700



# Save

In [1]:
import joblib

joblib.dump(svm, "/content/drive/MyDrive/trained_svm_model.pkl")

joblib.dump(knn, "/content/drive/MyDrive/trained_knn_model.pkl")

joblib.dump(scaler, "/content/drive/MyDrive/trained_scaler.pkl")

NameError: name 'svm' is not defined