In [1]:
from collections import Counter
from pathlib import Path
import os, glob

root = Path("data/newdata")
counts = {d.name: len([p for p in glob.glob(str(d/'*')) if p.lower().endswith(('.jpg','.jpeg','.png','.bmp'))])
          for d in sorted([p for p in root.iterdir() if p.is_dir()], key=lambda x: x.name)}
counts



{'glass': 195, 'metal': 417, 'other': 834, 'paper': 485, 'plastic': 2096}

In [None]:
from pathlib import Path
import glob, random, json, numpy as np, cv2
from collections import Counter
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from joblib import dump
import matplotlib.pyplot as plt


DATA_DIR   = Path("data/newdata")  # class folders: glass/metal/other/paper/plastic
MODELS_DIR = Path("models");  MODELS_DIR.mkdir(exist_ok=True)
REPORTS_DIR= Path("reports"); REPORTS_DIR.mkdir(exist_ok=True)

IMG_SIZE   = (160,160)
HOG_PARAMS = dict(orientations=9, pixels_per_cell=(8,8),
                  cells_per_block=(3,3), block_norm="L2-Hys",
                  transform_sqrt=True, feature_vector=True)


MIN_PER_CLASS = 140   # target per class after balancing (lower = faster)
MAX_PER_CLASS = 300   

random.seed(42); np.random.seed(42)

def list_images(root):
    per = {}
    for d in sorted([p for p in root.iterdir() if p.is_dir()], key=lambda x:x.name):
        files = []
        for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG","*.JPEG","*.BMP"):
            files.extend(glob.glob(str(d / ext)))
        per[d.name] = files
    return per

def preprocess(bgr):
    bgr = cv2.GaussianBlur(bgr, (3,3), 0)
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    L,A,B = cv2.split(lab)
    L = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(L)
    return cv2.cvtColor(cv2.merge([L,A,B]), cv2.COLOR_LAB2BGR)

def hog_color(bgr):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    feats = []
    for ch in cv2.split(rgb):
        feats.extend(hog(ch, **HOG_PARAMS))
    return np.asarray(feats, np.float32)

def hog_edge(bgr):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ed = cv2.Canny(gray, 80, 160)
    return hog(ed, **HOG_PARAMS).astype(np.float32)

def hsv_hist(bgr, bins=(16, 16, 16)):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv],[0,1,2],None,bins,[0,180, 0,256, 0,256])
    hist = cv2.normalize(hist, None).flatten().astype(np.float32)
    return hist

def light_aug(bgr):
    if random.random() < 0.5:
        bgr = cv2.flip(bgr, 1)
    if random.random() < 0.5:
        alpha = 1.0 + (random.random()*0.25 - 0.125)
        beta  = int(random.random()*20 - 10)
        bgr = cv2.convertScaleAbs(bgr, alpha=alpha, beta=beta)
    return bgr

per = list_images(DATA_DIR)
print("Raw:", {k: len(v) for k,v in per.items()})

X, y = [], []
for cls, files in per.items():
    files = files.copy(); random.shuffle(files)
    # undersample
    files = files[:MAX_PER_CLASS]
    # oversample up to MIN_PER_CLASS
    while len(files) < MIN_PER_CLASS and files:
        files.append(random.choice(files))
    for fp in files:
        img = cv2.imread(fp)
        if img is None: continue
        img = preprocess(img)
        if files.count(fp) > 1:
            img = light_aug(img)
        f = np.concatenate([hog_color(img), hog_edge(img), hsv_hist(img)], axis=0)
        X.append(f); y.append(cls)

X = np.stack(X).astype(np.float32); y = np.array(y)
print("Balanced (FAST):", X.shape, Counter(y))

le = LabelEncoder(); y_enc = le.fit_transform(y)
Xtr, Xte, ytr, yte = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
scaler = StandardScaler(with_mean=True, with_std=True)
Xtr_s = scaler.fit_transform(Xtr); Xte_s = scaler.transform(Xte)

svm = SVC(kernel="rbf", C=5, gamma="scale", probability=True,
          class_weight="balanced", random_state=42)
svm.fit(Xtr_s, ytr)

print(classification_report(yte, svm.predict(Xte_s), target_names=le.classes_, zero_division=0))
cm = confusion_matrix(yte, svm.predict(Xte_s))
plt.figure(figsize=(6,6)); plt.imshow(cm, cmap="Blues"); plt.title("Confusion Matrix (FAST)")
plt.xticks(range(len(le.classes_)), le.classes_, rotation=45, ha="right")
plt.yticks(range(len(le.classes_)), le.classes_)
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center", color="w")
plt.tight_layout(); plt.savefig(REPORTS_DIR/"cm_hog_svm_stronger_FAST.png", dpi=150); plt.close()

bundle = {
    "scaler": scaler, "svm": svm, "label_encoder": le,
    "img_size": IMG_SIZE, "hog_params": HOG_PARAMS,
    "classes": list(le.classes_), "feature_mode": "color_edge_hist"
}
dump(bundle, MODELS_DIR/"recycle_hog_svm.joblib")
with open(MODELS_DIR/"model_meta.json", "w") as f:
    json.dump({"classes": list(le.classes_), "img_size": IMG_SIZE, "feature_mode": "color_edge_hist"}, f, indent=2)

print("Saved ->", MODELS_DIR / "recycle_hog_svm.joblib")


In [5]:
from pathlib import Path
import glob, cv2
from src.classifier import HogSvmClassifier

clf = HogSvmClassifier("models/recycle_hog_svm.joblib")
print("Loaded:", clf.classes)
print("feature_mode:", clf.feature_mode)
print("img_size:", clf.img_size)
print("expected n_features:", getattr(clf.scaler, "n_features_in_", None))

for cls in ["glass","paper","plastic","metal","other"]:
    cands = []
    for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG"):
        cands.extend(glob.glob(str(Path("data/newdata")/cls/ext)))
    if not cands:
        print("No images for", cls)
        continue
    img = cv2.imread(cands[0])
    assert img is not None, f"Failed to load image for {cls}"
    preds = clf.topk(img, k=3)
    print(cls, "->", preds)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Loaded: ['glass' 'metal' 'other' 'paper' 'plastic']
feature_mode: color_edge_hist
img_size: (160, 160)
expected n_features: 109072
glass -> [('glass', 0.9389533269981982), ('other', 0.018539975971327758), ('paper', 0.016490179222848077)]
paper -> [('other', 0.5914534255259039), ('plastic', 0.19984699629314015), ('paper', 0.10484693730931591)]
plastic -> [('other', 0.7052492698031848), ('plastic', 0.12519324048584926), ('glass', 0.07457886019014824)]
metal -> [('metal', 0.9395071844647885), ('plastic', 0.02823487849412246), ('paper', 0.019023285535574266)]
other -> [('glass', 0.2772578947394055), ('other', 0.2717839482235699), ('plastic', 0.2594807199931873)]


In [6]:
from pathlib import Path
import cv2
from src.classifier import HogSvmClassifier


clf = HogSvmClassifier("models/recycle_hog_svm.joblib")
print("Loaded:", clf.classes, "| feature_mode:", clf.feature_mode, "| img_size:", clf.img_size)


root = Path("data/newdata/plastic")
cands = []
for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG"):
    cands.extend(root.glob(ext))

if not cands:
    raise FileNotFoundError(f"No image files found in {root}. Put at least one image there.")

img_path = cands[0]        
print("Testing with:", img_path)


img = cv2.imread(str(img_path))
if img is None or img.size == 0:
    raise ValueError(f"cv2.imread failed for {img_path}")

print("Image shape:", img.shape)


print("Top-3:", clf.topk(img, k=3))


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Loaded: ['glass' 'metal' 'other' 'paper' 'plastic'] | feature_mode: color_edge_hist | img_size: (160, 160)
Testing with: data\newdata\plastic\1001_2987_plastic.jpg
Image shape: (91, 57, 3)
Top-3: [('other', 0.7052492698031848), ('plastic', 0.12519324048584926), ('glass', 0.07457886019014824)]


In [7]:
from pathlib import Path
import cv2
from src.classifier import HogSvmClassifier


clf = HogSvmClassifier("models/recycle_hog_svm.joblib")
print("Loaded:", clf.classes, "| feature_mode:", clf.feature_mode, "| img_size:", clf.img_size)


folder = Path("data/newdata/plastic")          # try "paper", "glass", etc.
cands = []
for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG"):
    cands.extend(folder.glob(ext))

if not cands:
    raise FileNotFoundError(f"No image files found under {folder}")

img_path = cands[0]
print("Testing with:", img_path)


img = cv2.imread(str(img_path))
if img is None or img.size == 0:
    raise ValueError(f"cv2.imread failed for {img_path}")

print("Image shape:", img.shape)


print("Top-3:", clf.topk(img, k=3))


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Loaded: ['glass' 'metal' 'other' 'paper' 'plastic'] | feature_mode: color_edge_hist | img_size: (160, 160)
Testing with: data\newdata\plastic\1001_2987_plastic.jpg
Image shape: (91, 57, 3)
Top-3: [('other', 0.7052492698031848), ('plastic', 0.12519324048584926), ('glass', 0.07457886019014824)]


In [1]:

from pathlib import Path
import glob, random, json
import numpy as np, cv2
from collections import Counter
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from joblib import dump

DATA_DIR   = Path("data/newdata")  # glass/metal/other/paper/plastic
MODELS_DIR = Path("models");  MODELS_DIR.mkdir(exist_ok=True)
REPORTS_DIR= Path("reports"); REPORTS_DIR.mkdir(exist_ok=True)

IMG_SIZE   = (160,160)
HOG_PARAMS = dict(orientations=9, pixels_per_cell=(8,8),
                  cells_per_block=(3,3), block_norm="L2-Hys",
                  transform_sqrt=True, feature_vector=True)

TARGET_PER_CLASS = 220
MAX_OTHER        = 250

random.seed(42); np.random.seed(42)

def list_images(root):
    per = {}
    for d in sorted([p for p in root.iterdir() if p.is_dir()], key=lambda x:x.name):
        files = []
        for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG","*.JPEG","*.BMP"):
            files.extend(glob.glob(str(d / ext)))
        per[d.name] = files
    return per

def preprocess(bgr):
    bgr = cv2.GaussianBlur(bgr, (3,3), 0)
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    L,A,B = cv2.split(lab)
    L = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(L)
    return cv2.cvtColor(cv2.merge([L,A,B]), cv2.COLOR_LAB2BGR)

def hog_color(bgr):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    feats = []
    for ch in cv2.split(rgb):
        feats.extend(hog(ch, **HOG_PARAMS))
    return np.asarray(feats, np.float32)

def hog_edge(bgr):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ed = cv2.Canny(gray, 80, 160)
    return hog(ed, **HOG_PARAMS).astype(np.float32)

def hsv_hist(bgr, bins=(16, 16, 16)):
    img = cv2.resize(bgr, IMG_SIZE, interpolation=cv2.INTER_AREA)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv],[0,1,2],None,bins,[0,180, 0,256, 0,256])
    hist = cv2.normalize(hist, None).flatten().astype(np.float32)
    return hist

def light_aug(bgr):
    if random.random() < 0.5: bgr = cv2.flip(bgr, 1)
    if random.random() < 0.5:
        alpha = 1.0 + (random.random()*0.25 - 0.125)
        beta  = int(random.random()*20 - 10)
        bgr = cv2.convertScaleAbs(bgr, alpha=alpha, beta=beta)
    return bgr

per = list_images(DATA_DIR)
print("Raw:", {k: len(v) for k,v in per.items()})
if not per:
    raise SystemExit("No class folders in data/newdata")

X, y = [], []
for cls, files in per.items():
    files = files.copy(); random.shuffle(files)
    cap = MAX_OTHER if cls.lower()=="other" else TARGET_PER_CLASS
    files = files[:cap]
    while len(files) < TARGET_PER_CLASS and files:
        files.append(random.choice(files))
    for fp in files:
        img = cv2.imread(fp)
        if img is None: continue
        img = preprocess(img)
        if files.count(fp) > 1: img = light_aug(img)
        f = np.concatenate([hog_color(img), hog_edge(img), hsv_hist(img)], axis=0)
        X.append(f); y.append(cls)

X = np.stack(X).astype(np.float32); y = np.array(y)
print("Balanced (FAST):", X.shape, Counter(y))


le = LabelEncoder(); y_enc = le.fit_transform(y)
Xtr, Xte, ytr, yte = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
scaler = StandardScaler(with_mean=True, with_std=True)
Xtr_s = scaler.fit_transform(Xtr); Xte_s = scaler.transform(Xte)

svm = SVC(kernel="rbf", C=5, gamma="scale", probability=True,
          class_weight="balanced", random_state=42)
svm.fit(Xtr_s, ytr)

report = classification_report(yte, svm.predict(Xte_s), target_names=le.classes_, zero_division=0)
print(report)
cm = confusion_matrix(yte, svm.predict(Xte_s))
np.save(REPORTS_DIR/"cm.npy", cm)
with open(REPORTS_DIR/"classification_report.txt","w", encoding="utf-8") as f:
    f.write(report)
print("Saved text report and cm.npy in", REPORTS_DIR)

bundle = {
    "scaler": scaler, "svm": svm, "label_encoder": le,
    "img_size": IMG_SIZE, "hog_params": HOG_PARAMS,
    "classes": list(le.classes_), "feature_mode": "color_edge_hist",
    "calib_weights": [1.05 if c in ("plastic","paper") else (0.82 if c=="other" else 1.0)
                      for c in le.classes_]
}
dump(bundle, MODELS_DIR/"recycle_hog_svm.joblib")
with open(MODELS_DIR/"model_meta.json", "w", encoding="utf-8") as f:
    json.dump({"classes": list(le.classes_), "img_size": IMG_SIZE,
               "feature_mode": "color_edge_hist",
               "calib_weights": bundle["calib_weights"]}, f, indent=2)
print("Saved ->", MODELS_DIR / "recycle_hog_svm.joblib")


Raw: {'glass': 390, 'metal': 834, 'other': 1668, 'paper': 970, 'plastic': 4192}
Balanced (FAST): (1130, 109072) Counter({'other': 250, 'glass': 220, 'metal': 220, 'paper': 220, 'plastic': 220})
              precision    recall  f1-score   support

       glass       0.59      0.59      0.59        44
       metal       0.54      0.43      0.48        44
       other       0.44      0.72      0.55        50
       paper       0.50      0.34      0.41        44
     plastic       0.37      0.30      0.33        44

    accuracy                           0.48       226
   macro avg       0.49      0.48      0.47       226
weighted avg       0.49      0.48      0.47       226

Saved text report and cm.npy in reports
Saved -> models\recycle_hog_svm.joblib
