<a href="https://colab.research.google.com/github/OsaVS/cnn-realwaste/blob/main/State_of_Art.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
from torchvision import models, datasets, transforms
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU: Tesla T4


In [None]:
%pip install kaggle



In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"osandasamaratunge","key":"9c6ea4ebf390e20bb02fb16c100e2253"}'}

In [None]:
!mkdir -p ~/.kaggle

In [None]:
!mv kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
import kaggle
kaggle.api.dataset_download_files('rtti237/realwaste-dataset', unzip=True)

Dataset URL: https://www.kaggle.com/datasets/rtti237/realwaste-dataset


In [None]:
%ls

[0m[01;34mRealWaste[0m/  [01;34msample_data[0m/


In [None]:
DATA_DIR = './RealWaste'
print(f"Updated DATA_DIR: {DATA_DIR}")

Updated DATA_DIR: ./RealWaste


In [None]:
# Dataset parameters
IMAGE_SIZE = 224 # Updated image size
BATCH_SIZE = 32
NUM_CLASSES = 9
NUM_EPOCHS = 50

# Class names and their counts
CLASS_NAMES = [
    'Cardboard',         # 461
    'Food Organics',     # 411
    'Glass',             # 420
    'Metal',             # 790
    'Miscellaneous Trash',     # 495
    'Paper',             # 500
    'Plastic',           # 921
    'Textile Trash',           # 318
    'Vegetation'         # 436
]

In [None]:
class WasteDataset(Dataset):
    """Fixed Custom Dataset for loading waste material images"""

    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        self.images = []
        self.labels = []

        # Load all images and labels with SORTING
        for class_idx, class_name in enumerate(CLASS_NAMES):
            class_dir = self.root_dir / class_name
            if class_dir.exists():
                # ✅ SORT FILES to ensure consistent ordering
                image_files = sorted(class_dir.glob('*.*'))

                for img_path in image_files:
                    if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                        self.images.append(str(img_path))
                        self.labels.append(class_idx)
            else:
                print(f"⚠️ Warning: Directory not found: {class_dir}")

        print(f"Loaded {len(self.images)} images from {len(CLASS_NAMES)} classes")

        # ✅ ADD: Verify we have images
        if len(self.images) == 0:
            raise RuntimeError(f"No images found in {root_dir}")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]

        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            # Return black image if error
            image = Image.new('RGB', (224, 224), (0, 0, 0))

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
def calculate_mean_std(dataset_path, image_size=224, sample_size=None):
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor()
    ])

    temp_dataset = WasteDataset(root_dir=dataset_path, transform=transform)

    if sample_size and sample_size < len(temp_dataset):
        indices = np.random.choice(len(temp_dataset), sample_size, replace=False)
        temp_dataset = torch.utils.data.Subset(temp_dataset, indices)

    loader = DataLoader(temp_dataset, batch_size=32, shuffle=False, num_workers=4)

    channels_sum = torch.zeros(3)
    channels_squared_sum = torch.zeros(3)
    num_pixels = 0

    for images, _ in loader:
        channels_sum += torch.mean(images, dim=[0, 2, 3]) * images.size(0)
        channels_squared_sum += torch.mean(images ** 2, dim=[0, 2, 3]) * images.size(0)
        num_pixels += images.size(0)

    mean = channels_sum / num_pixels
    std = torch.sqrt(channels_squared_sum / num_pixels - mean ** 2)

    print(f"Dataset Mean (R, G, B): [{mean[0]:.4f}, {mean[1]:.4f}, {mean[2]:.4f}]")
    print(f"Dataset Std (R, G, B): [{std[0]:.4f}, {std[1]:.4f}, {std[2]:.4f}]")

    return mean.tolist(), std.tolist()

dataset_mean, dataset_std = calculate_mean_std(DATA_DIR, IMAGE_SIZE, sample_size=1000)

Loaded 4752 images from 9 classes




Dataset Mean (R, G, B): [0.5959, 0.6181, 0.6327]
Dataset Std (R, G, B): [0.1614, 0.1624, 0.1879]


ImageNet normalization is used.

In [None]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.7,1.0), ratio=(0.9,1.1)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.15),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2,0.2,0.2,0.02),
    transforms.ToTensor(),
    transforms.Normalize(mean=dataset_mean, std=dataset_std),
    transforms.RandomErasing(p=0.25, scale=(0.02,0.2), ratio=(0.3,3.3))
])

val_test_transform = transforms.Compose([
    transforms.Resize(int(IMAGE_SIZE*1.15)),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=dataset_mean, std=dataset_std)
])


In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
from torch.utils.data import Subset

print("="*70)
print("LOADING AND SPLITTING DATASET")
print("="*70)

# ✅ STEP 1: Define split ratios FIRST
train_ratio = 0.70
val_ratio = 0.15
test_ratio = 0.15

print(f"\nSplit ratios:")
print(f"  Training:   {train_ratio*100:.0f}%")
print(f"  Validation: {val_ratio*100:.0f}%")
print(f"  Testing:    {test_ratio*100:.0f}%")

# ✅ STEP 2: Create full dataset (without transforms for now)
print(f"\nLoading dataset from: {DATA_DIR}")
full_dataset = WasteDataset(root_dir=DATA_DIR, transform=None)

# ✅ STEP 3: Extract all labels
print("\nExtracting labels...")
labels = np.array([full_dataset[i][1] for i in range(len(full_dataset))])

print(f"\nLabel distribution in full dataset:")
unique, counts = np.unique(labels, return_counts=True)
for label_idx, count in zip(unique, counts):
    print(f"  {CLASS_NAMES[label_idx]:20s}: {count:4d} images ({count/len(labels)*100:.1f}%)")

# ✅ STEP 4: First split - separate training from (validation + test)
print(f"\n1️⃣ Splitting: Train vs (Val + Test)...")
sss1 = StratifiedShuffleSplit(
    n_splits=1,
    test_size=(val_ratio + test_ratio),
    random_state=42
)
train_idx, temp_idx = next(sss1.split(np.arange(len(labels)), labels))

print(f"  Train: {len(train_idx)} images")
print(f"  Temp:  {len(temp_idx)} images")

# ✅ STEP 5: Second split - separate validation from test
print(f"\n2️⃣ Splitting: Val vs Test...")
temp_labels = labels[temp_idx]
relative_test_size = test_ratio / (val_ratio + test_ratio)

sss2 = StratifiedShuffleSplit(
    n_splits=1,
    test_size=relative_test_size,
    random_state=42
)
val_idx_rel, test_idx_rel = next(sss2.split(np.arange(len(temp_idx)), temp_labels))

# Convert relative indices to absolute indices
val_idx = temp_idx[val_idx_rel]
test_idx = temp_idx[test_idx_rel]

print(f"  Val:  {len(val_idx)} images")
print(f"  Test: {len(test_idx)} images")

# ✅ STEP 6: Verify split is stratified
print("\n" + "="*70)
print("VERIFYING STRATIFIED SPLIT")
print("="*70)

def print_split_distribution(indices, split_name):
    split_labels = labels[indices]
    print(f"\n{split_name} distribution:")
    unique, counts = np.unique(split_labels, return_counts=True)
    for label_idx, count in zip(unique, counts):
        percentage = count / len(split_labels) * 100
        print(f"  {CLASS_NAMES[label_idx]:20s}: {count:4d} ({percentage:5.1f}%)")

print_split_distribution(train_idx, "TRAINING")
print_split_distribution(val_idx, "VALIDATION")
print_split_distribution(test_idx, "TESTING")

# ✅ STEP 7: Create separate dataset objects WITH transforms
print("\n" + "="*70)
print("CREATING DATASETS WITH TRANSFORMS")
print("="*70)

# Create three separate dataset instances
train_dataset_full = WasteDataset(root_dir=DATA_DIR, transform=train_transform)
val_dataset_full = WasteDataset(root_dir=DATA_DIR, transform=val_test_transform)
test_dataset_full = WasteDataset(root_dir=DATA_DIR, transform=val_test_transform)

# Create subsets
train_dataset = Subset(train_dataset_full, train_idx)
val_dataset = Subset(val_dataset_full, val_idx)
test_dataset = Subset(test_dataset_full, test_idx)

print(f"\n✅ Datasets created:")
print(f"  Training:   {len(train_dataset):4d} images with augmentation")
print(f"  Validation: {len(val_dataset):4d} images (no augmentation)")
print(f"  Testing:    {len(test_dataset):4d} images (no augmentation)")

# ✅ STEP 8: Create DataLoaders
print("\n" + "="*70)
print("CREATING DATA LOADERS")
print("="*70)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
    drop_last=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    drop_last=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    drop_last=False
)

print(f"\n✅ Data loaders created:")
print(f"  Training:   {len(train_loader):4d} batches (batch_size={BATCH_SIZE})")
print(f"  Validation: {len(val_loader):4d} batches (batch_size={BATCH_SIZE})")
print(f"  Testing:    {len(test_loader):4d} batches (batch_size={BATCH_SIZE})")

# ✅ STEP 9: Quick sanity check
print("\n" + "="*70)
print("SANITY CHECK")
print("="*70)

# Test one batch
images, batch_labels = next(iter(train_loader))
print(f"\n✅ Successfully loaded a batch:")
print(f"  Image shape: {images.shape}")
print(f"  Label shape: {batch_labels.shape}")
print(f"  Image range: [{images.min():.3f}, {images.max():.3f}]")
print(f"  Labels in batch: {batch_labels.tolist()[:10]}...")

print("\n" + "="*70)
print("✅ DATASET LOADING COMPLETE!")
print("="*70)

LOADING AND SPLITTING DATASET

Split ratios:
  Training:   70%
  Validation: 15%
  Testing:    15%

Loading dataset from: ./RealWaste
Loaded 4752 images from 9 classes

Extracting labels...

Label distribution in full dataset:
  Cardboard           :  461 images (9.7%)
  Food Organics       :  411 images (8.6%)
  Glass               :  420 images (8.8%)
  Metal               :  790 images (16.6%)
  Miscellaneous Trash :  495 images (10.4%)
  Paper               :  500 images (10.5%)
  Plastic             :  921 images (19.4%)
  Textile Trash       :  318 images (6.7%)
  Vegetation          :  436 images (9.2%)

1️⃣ Splitting: Train vs (Val + Test)...
  Train: 3326 images
  Temp:  1426 images

2️⃣ Splitting: Val vs Test...
  Val:  713 images
  Test: 713 images

VERIFYING STRATIFIED SPLIT

TRAINING distribution:
  Cardboard           :  323 (  9.7%)
  Food Organics       :  288 (  8.7%)
  Glass               :  294 (  8.8%)
  Metal               :  553 ( 16.6%)
  Miscellaneous Trash :  3

Load pretrained ResNet50

In [None]:
model = models.resnet50(pretrained=True)

# freeze earlier layers (optional for transfer learning)
# for params in model.parameters():
#   params.requires_grad = False

# Replace the final fully connected layer
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 9)

# Move the model to the GPU if available
model = model.to(device)




In [None]:
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.save_checkpoint(model)
            self.counter = 0

    def save_checkpoint(self, model):
        self.best_model = {k: v.cpu().clone() for k, v in model.state_dict().items()}

early_stopping = EarlyStopping(patience=10)
best_val_acc = 0.0

print("✅ Early stopping enabled (patience=10)")

✅ Early stopping enabled (patience=10)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
early_stopping = EarlyStopping(patience=15)

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """Training for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc


def validate_epoch(model, dataloader, criterion, device):
    """Validation"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc


print("\n" + "="*70)
print("STARTING TRAINING - CUSTOM WASTENET-DEEP")
print("="*70)

history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'lr': []
}

best_val_acc = 0.0
best_epoch = 0
start_time = time.time()

for epoch in range(NUM_EPOCHS):
    epoch_start = time.time()

    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)

    # Validate
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)

    # Update scheduler
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']

    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['lr'].append(current_lr)

    # Print progress
    epoch_time = time.time() - epoch_start
    print(f"Epoch [{epoch+1:3d}/{NUM_EPOCHS}] ({epoch_time:4.1f}s) | "
          f"LR: {current_lr:.6f} | "
          f"Train: {train_acc:5.2f}% | "
          f"Val: {val_acc:5.2f}%", end='')

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch + 1
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'val_acc': val_acc,
            'val_loss': val_loss,
            'train_acc': train_acc,
            'history': history
        }, 'best_wastenet_deep.pth')
        print(" ✨ BEST!", end='')

    print()

    # Milestone messages
    if epoch > 0:
        if val_acc >= 80 and history['val_acc'][-2] < 80:
            print("   🎯 Milestone: 80% accuracy reached!")
        elif val_acc >= 85 and history['val_acc'][-2] < 85:
            print("   🎉 Milestone: 85% accuracy reached!")
        elif val_acc >= 90 and history['val_acc'][-2] < 90:
            print("   🏆 EXCELLENT: 90% TARGET ACHIEVED!")

    # Early stopping check
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print(f"\n⏹️  Early stopping triggered at epoch {epoch+1}")
        break

total_time = time.time() - start_time

print("\n" + "="*70)
print("✅ TRAINING COMPLETE")
print("="*70)
print(f"⏱️  Total training time: {total_time/60:.1f} minutes ({total_time/3600:.2f} hours)")
print(f"🏆 Best validation accuracy: {best_val_acc:.2f}%")
print(f"📍 Best epoch: {best_epoch}")
print(f"💾 Model saved as: best_wastenet_deep.pth")
print("="*70)


STARTING TRAINING - CUSTOM WASTENET-DEEP
Epoch [  1/100] (49.3s) | LR: 0.001000 | Train: 67.65% | Val: 69.99% ✨ BEST!
Epoch [  2/100] (49.2s) | LR: 0.001000 | Train: 70.99% | Val: 73.49% ✨ BEST!
Epoch [  3/100] (47.9s) | LR: 0.001000 | Train: 73.21% | Val: 74.47% ✨ BEST!
Epoch [  4/100] (47.3s) | LR: 0.001000 | Train: 73.24% | Val: 73.35%
Epoch [  5/100] (48.8s) | LR: 0.001000 | Train: 74.44% | Val: 75.74% ✨ BEST!
Epoch [  6/100] (48.1s) | LR: 0.001000 | Train: 75.14% | Val: 77.00% ✨ BEST!
Epoch [  7/100] (48.2s) | LR: 0.001000 | Train: 75.50% | Val: 75.74%
Epoch [  8/100] (47.9s) | LR: 0.001000 | Train: 75.05% | Val: 77.98% ✨ BEST!
Epoch [  9/100] (48.9s) | LR: 0.001000 | Train: 77.09% | Val: 76.72%
Epoch [ 10/100] (48.4s) | LR: 0.001000 | Train: 77.06% | Val: 73.77%
Epoch [ 11/100] (47.6s) | LR: 0.001000 | Train: 76.49% | Val: 76.44%
Epoch [ 12/100] (48.5s) | LR: 0.001000 | Train: 77.30% | Val: 76.58%
Epoch [ 13/100] (48.8s) | LR: 0.001000 | Train: 77.93% | Val: 76.44%
Epoch [ 14/10

KeyboardInterrupt: 

In [None]:
for epoch in range(20):
    model.train()
    running_loss, running_corrects = 0.0, 0

    for images, labels in train_loader:  # from your existing split
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)

    # Validation phase
    model.eval()
    val_running_loss, val_running_corrects = 0.0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            val_running_loss += loss.item() * images.size(0)
            val_running_corrects += torch.sum(preds == labels.data)

    val_epoch_loss = val_running_loss / len(val_dataset)
    val_epoch_acc = val_running_corrects.double() / len(val_dataset)

    print(f"Epoch {epoch+1}: Train Loss={epoch_loss:.4f} Acc={epoch_acc:.4f}, Val Loss={val_epoch_loss:.4f} Acc={val_epoch_acc:.4f}")

KeyboardInterrupt: 