In [6]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
import numpy as np
from sklearn.model_selection import train_test_split
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from pathlib import Path

# allow jpg + png
def is_img(p): 
    p=str(p).lower()
    return p.endswith((".png",".jpg",".jpeg",".bmp",".webp"))

# FIX: Point to the correct directory with /images/images
base_path = Path("C:/Users/hoang/.cache/kagglehub/datasets/alistairking/recyclable-and-household-waste-classification/versions/1")
candidates = [
    base_path / "images" / "images",
    base_path / "images",
    base_path,
]

root = None
for c in candidates:
    if c.exists() and any(d.is_dir() for d in c.iterdir()):
        root = c
        break

if root is None:
    raise FileNotFoundError(f"Could not find image folder in: {base_path}")

print(f"Using dataset root: {root}")
full = datasets.ImageFolder(root=str(root), transform=None, is_valid_file=is_img)
print(f"Found {len(full.classes)} classes: {full.classes[:5]}...")
targets = [y for _, y in full.samples]

# stratified split
tr_idx, va_idx = train_test_split(
    np.arange(len(targets)), test_size=0.2, random_state=56, stratify=targets
)

train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
])
val_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
])

train_ds = Subset(datasets.ImageFolder(str(root), transform=train_tfms, is_valid_file=is_img), tr_idx)
val_ds   = Subset(datasets.ImageFolder(str(root), transform=val_tfms,   is_valid_file=is_img), va_idx)

# optional balancing
counts = np.bincount(np.array(targets)[tr_idx])
class_w = 1.0 / np.clip(counts, 1, None)
weights = class_w[np.array(targets)[tr_idx]]
sampler = WeightedRandomSampler(weights, num_samples=len(tr_idx), replacement=True)

train_dl = DataLoader(train_ds, batch_size=64, sampler=sampler, num_workers=4, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=128, shuffle=False, num_workers=4, pin_memory=True)
num_classes = len(full.classes)


Using dataset root: C:\Users\hoang\.cache\kagglehub\datasets\alistairking\recyclable-and-household-waste-classification\versions\1\images\images
Found 30 classes: ['aerosol_cans', 'aluminum_food_cans', 'aluminum_soda_cans', 'cardboard_boxes', 'cardboard_packaging']...


In [7]:
# Verify dataset is loaded correctly
print(f"✓ Number of classes: {num_classes}")
print(f"✓ Total samples: {len(full.samples)}")
print(f"✓ Train samples: {len(tr_idx)}")
print(f"✓ Val samples: {len(va_idx)}")
print(f"✓ No overlap: {set(tr_idx).isdisjoint(set(va_idx))}")

# Quick check for duplicates (sample first 100 files)
from hashlib import md5
def quick_hash_check(indices, n=100):
    hashes = set()
    for i in indices[:n]:
        path = full.samples[i][0]
        with open(path, 'rb') as f:
            hashes.add(md5(f.read()).hexdigest())
    return hashes

train_hashes = quick_hash_check(tr_idx.tolist())
val_hashes = quick_hash_check(va_idx.tolist())
duplicates = train_hashes & val_hashes
print(f"✓ Duplicate images in sample (should be 0): {len(duplicates)}")

if num_classes == 1:
    print("⚠️  WARNING: Only 1 class detected - check dataset path!")
if len(duplicates) > 0:
    print(f"⚠️  WARNING: Found {len(duplicates)} duplicate images between train/val!")


✓ Number of classes: 30
✓ Total samples: 15000
✓ Train samples: 12000
✓ Val samples: 3000
✓ No overlap: True
✓ Duplicate images in sample (should be 0): 0


In [8]:
import timm, torch
from torch import nn

def make_model(name, num_classes):
    m = timm.create_model(name, pretrained=True, drop_rate=0.2, drop_path_rate=0.1, num_classes=num_classes)
    return m

m_small = make_model("mobilenetv3_small_100", num_classes)
m_large = make_model("mobilenetv3_large_100", num_classes)


In [9]:
from torch.amp import autocast, GradScaler

def train_model(model, train_dl, val_dl, epochs=15, lr=5e-4, wd=0.05, device="cuda"):
    model.to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    warmup = torch.optim.lr_scheduler.LinearLR(opt, start_factor=0.1, total_iters=3)
    cosine = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs-3)
    sched = torch.optim.lr_scheduler.SequentialLR(opt, [warmup, cosine], milestones=[3])
    crit = nn.CrossEntropyLoss(label_smoothing=0.1)
    scaler = GradScaler('cuda', enabled=(device.startswith("cuda")))
    best = {"f1": -1, "state": None}

    for ep in range(epochs):
        model.train()
        for x,y in train_dl:
            x,y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
            opt.zero_grad(set_to_none=True)
            with autocast('cuda', enabled=(device.startswith("cuda"))):
                logits = model(x)
                loss = crit(logits, y)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
        sched.step()

        # eval
        model.eval()
        preds, gts = [], []
        with torch.no_grad():
            for x,y in val_dl:
                x = x.to(device, non_blocking=True)
                logits = model(x)
                preds.append(logits.argmax(1).cpu())
                gts.append(y)
        import numpy as np
        from sklearn.metrics import f1_score, accuracy_score
        p = torch.cat(preds).numpy(); g = torch.cat(gts).numpy()
        f1 = f1_score(g, p, average="macro"); acc = accuracy_score(g, p)
        if f1 > best["f1"]:
            best = {"f1": f1, "state": model.state_dict()}
        print(f"ep {ep+1}: acc {acc:.4f}  macroF1 {f1:.4f}")
    model.load_state_dict(best["state"])
    return model


In [10]:
import torch; print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")


True
NVIDIA GeForce RTX 4070 Laptop GPU


In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
small = train_model(m_small, train_dl, val_dl, epochs=20, device=device)
large = train_model(m_large, train_dl, val_dl, epochs=20, device=device)


ep 1: acc 0.5730  macroF1 0.5655
ep 2: acc 0.7160  macroF1 0.7138




ep 3: acc 0.7180  macroF1 0.7179
ep 4: acc 0.7127  macroF1 0.7080
ep 5: acc 0.7433  macroF1 0.7450
ep 6: acc 0.7690  macroF1 0.7690
ep 7: acc 0.7093  macroF1 0.7050
ep 8: acc 0.7857  macroF1 0.7859
ep 9: acc 0.7993  macroF1 0.8001
ep 10: acc 0.8080  macroF1 0.8107
ep 11: acc 0.8357  macroF1 0.8357
ep 12: acc 0.8457  macroF1 0.8439
ep 13: acc 0.8400  macroF1 0.8398
ep 14: acc 0.8587  macroF1 0.8582
ep 15: acc 0.8580  macroF1 0.8580
ep 16: acc 0.8607  macroF1 0.8599
ep 17: acc 0.8673  macroF1 0.8667
ep 18: acc 0.8627  macroF1 0.8631
ep 19: acc 0.8600  macroF1 0.8602
ep 20: acc 0.8623  macroF1 0.8625
ep 1: acc 0.5967  macroF1 0.5952
ep 2: acc 0.7850  macroF1 0.7834




ep 3: acc 0.8363  macroF1 0.8367
ep 4: acc 0.8373  macroF1 0.8369
ep 5: acc 0.8597  macroF1 0.8586
ep 6: acc 0.8637  macroF1 0.8633
ep 7: acc 0.8597  macroF1 0.8592
ep 8: acc 0.8667  macroF1 0.8672
ep 9: acc 0.8707  macroF1 0.8694
ep 10: acc 0.8733  macroF1 0.8726
ep 11: acc 0.8843  macroF1 0.8835
ep 12: acc 0.8793  macroF1 0.8788
ep 13: acc 0.8783  macroF1 0.8778
ep 14: acc 0.8803  macroF1 0.8802
ep 15: acc 0.8820  macroF1 0.8812
ep 16: acc 0.8793  macroF1 0.8793
ep 17: acc 0.8840  macroF1 0.8835
ep 18: acc 0.8823  macroF1 0.8818
ep 19: acc 0.8810  macroF1 0.8804
ep 20: acc 0.8840  macroF1 0.8840


In [12]:
import time, torch
def ms_per_image(model, device="cuda"):
    model.eval().to(device)
    x = torch.randn(1,3,224,224, device=device)
    # warmup
    for _ in range(5): model(x)
    if device=="cuda": torch.cuda.synchronize()
    t0 = time.time()
    for _ in range(50): model(x)
    if device=="cuda": torch.cuda.synchronize()
    return 1000*(time.time()-t0)/50

print("Small ms/img:", ms_per_image(small, device))
print("Large ms/img:", ms_per_image(large, device))


Small ms/img: 11.293988227844238
Large ms/img: 12.904634475708008


In [13]:
torch.save({"model":"mobilenetv3_small_100","classes":full.classes,"state_dict":small.state_dict()}, "mobilenetv3_small.pt")

# ONNX for CPU apps
dummy = torch.randn(1,3,224,224)
small.eval().cpu()
torch.onnx.export(small, dummy, "mobilenetv3_small.onnx", input_names=["input"], output_names=["logits"], opset_version=17)
