In [7]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
from pathlib import Path
import time
import copy

In [8]:
torch.__version__

'2.9.0+cu128'

In [9]:
input_path = Path("data/datasets/trashnet_01/")
input_path2 = Path("data/datasets/self-collected-01/")

In [10]:
import random

seed = 16

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
from collections import Counter

def compute_class_weights_from_imagefolder(train_root, num_classes):
    ds = datasets.ImageFolder(train_root, transform=transforms.ToTensor())
    counts = Counter(ds.targets)
    total = sum(counts.values())

    weights = [total / counts[i] for i in range(num_classes)]
    weights = torch.tensor(weights, dtype=torch.float)
    weights = weights / weights.sum() * num_classes 
    return weights, ds.classes, counts

tmp_ds = datasets.ImageFolder(input_path / "train")
num_classes = len(tmp_ds.classes)

class_weights, class_names, train_counts = compute_class_weights_from_imagefolder(
    input_path / "train",
    num_classes
)

class_weights = class_weights.to(device)

criterion_weighted = torch.nn.CrossEntropyLoss(weight=class_weights)

print("Classes:", class_names)
print("Train counts:", train_counts)
print("Class weights:", class_weights)

Classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
Train counts: Counter({3: 475, 1: 400, 4: 385, 2: 328, 0: 322, 5: 109})
Class weights: tensor([0.8270, 0.6657, 0.8119, 0.5606, 0.6917, 2.4431], device='cuda:0')


In [13]:
# baseline data transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

data_transforms = {

    "train": transforms.Compose([
        transforms.RandomResizedCrop(224),  
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        normalize
    ]),
    "val": transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ]),    
}

image_datasets = {
    "train": datasets.ImageFolder(input_path / "train", data_transforms["train"]),
    "val": datasets.ImageFolder(input_path / "val", data_transforms["val"]),
    "test": datasets.ImageFolder(input_path / "test", data_transforms["val"]),

    "test_01": datasets.ImageFolder(input_path2, data_transforms["val"])
}


dataloaders = {
    "train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=32, shuffle=True, num_workers=4),
    "val": torch.utils.data.DataLoader(image_datasets["val"], batch_size=32, shuffle=False, num_workers=4),
    "test": torch.utils.data.DataLoader(image_datasets["test"], batch_size=32, shuffle=False, num_workers=4),

    "test_01": torch.utils.data.DataLoader(image_datasets["test_01"], batch_size=32, shuffle=False, num_workers=4)
}   


dataset_size = {
    "train": len(image_datasets["train"]),
    "val": len(image_datasets["val"]),
    "test_01": len(image_datasets["test_01"])
}

num_classes = len((image_datasets["train"]).classes)

In [14]:
#geometric augmentation

data_transforms_geo = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.9, 1.1), shear=5),
        transforms.ToTensor(),
        normalize
    ]),
    "val": transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ]), 
}

image_datasets_geo = {
    "train": datasets.ImageFolder(input_path / "train", data_transforms_geo["train"]),
    "val": datasets.ImageFolder(input_path / "val", data_transforms_geo["val"]),
    "test": datasets.ImageFolder(input_path / "test", data_transforms_geo["val"]),
    "test_01": datasets.ImageFolder(input_path2, data_transforms_geo["val"])
}

dataloaders_geo = {
    "train": torch.utils.data.DataLoader(image_datasets_geo["train"], batch_size=32, shuffle=True, num_workers=4),
    "val": torch.utils.data.DataLoader(image_datasets_geo["val"], batch_size=32, shuffle=False, num_workers=4),
    "test": torch.utils.data.DataLoader(image_datasets_geo["test"], batch_size=32, shuffle=False, num_workers=4),
    "test_01": torch.utils.data.DataLoader(image_datasets_geo["test_01"], batch_size=32, shuffle=False, num_workers=4)
}

dataset_size_geo = {
    "train": len(image_datasets_geo["train"]),
    "val": len(image_datasets_geo["val"]),
    "test": len(image_datasets_geo["test"]),
    "test_01": len(image_datasets_geo["test_01"])
}

num_classes_geo = len(image_datasets_geo["train"].classes)


In [15]:
#photometric
 
data_transforms_photo = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1),
        transforms.RandomGrayscale(p=0.1),
        transforms.ToTensor(),
        normalize
    ]),
    "val": transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ]), 
}

image_datasets_photo = {
    "train": datasets.ImageFolder(input_path / "train", data_transforms_photo["train"]),
    "val": datasets.ImageFolder(input_path / "val", data_transforms_photo["val"]),
    "test": datasets.ImageFolder(input_path / "test", data_transforms_photo["val"]),
    "test_01": datasets.ImageFolder(input_path2, data_transforms_photo["val"])
}

dataloaders_photo = {
    "train": torch.utils.data.DataLoader(image_datasets_photo["train"], batch_size=32, shuffle=True, num_workers=4),
    "val": torch.utils.data.DataLoader(image_datasets_photo["val"], batch_size=32, shuffle=False, num_workers=4),
    "test": torch.utils.data.DataLoader(image_datasets_photo["test"], batch_size=32, shuffle=False, num_workers=4),
    "test_01": torch.utils.data.DataLoader(image_datasets_photo["test_01"], batch_size=32, shuffle=False, num_workers=4)
}

dataset_size_photo = {
    "train": len(image_datasets_photo["train"]),
    "val": len(image_datasets_photo["val"]),
    "test": len(image_datasets_photo["test"]),
    "test_01": len(image_datasets_photo["test_01"])
}

num_classes_photo = len(image_datasets_photo["train"].classes)


In [None]:
#aug mix
 
data_transforms_mix = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.9, 1.1), shear=5),
        transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        normalize,
        transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value='random')
    ]),
    "val": transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ]), 
}

image_datasets_mix = {
    "train": datasets.ImageFolder(input_path / "train", data_transforms_mix["train"]),
    "val": datasets.ImageFolder(input_path / "val", data_transforms_mix["val"]),
    "test": datasets.ImageFolder(input_path / "test", data_transforms_mix["val"]),
    "test_01": datasets.ImageFolder(input_path2, data_transforms_mix["val"])
}

dataloaders_mix = {
    "train": torch.utils.data.DataLoader(image_datasets_mix["train"], batch_size=32, shuffle=True, num_workers=4),
    "val": torch.utils.data.DataLoader(image_datasets_mix["val"], batch_size=32, shuffle=False, num_workers=4),
    "test": torch.utils.data.DataLoader(image_datasets_mix["test"], batch_size=32, shuffle=False, num_workers=4),
    "test_01": torch.utils.data.DataLoader(image_datasets_mix["test_01"], batch_size=32, shuffle=False, num_workers=4)
}

dataset_size_mix = {
    "train": len(image_datasets_mix["train"]),
    "val": len(image_datasets_mix["val"]),
    "test": len(image_datasets_mix["test"]),
    "test_01": len(image_datasets_mix["test_01"])
}

num_classes_mix = len(image_datasets_mix["train"].classes)


In [17]:
#MixUp function

def apply_mixup(inputs, labels, alpha=0.4):
    if alpha <= 0:
        return inputs, labels, labels, 1.0

    lam = np.random.beta(alpha, alpha)
    batch_size = inputs.size(0)
    index = torch.randperm(batch_size, device=inputs.device)

    mixed_inputs = lam * inputs + (1 - lam) * inputs[index]
    labels_a = labels
    labels_b = labels[index]
    return mixed_inputs, labels_a, labels_b, lam

In [18]:
def train_model(model, dataloaders, dataset_size, device, criterion, optimizer, num_epochs = 25, use_mixup=False, mixup_prob=0.5, mixup_alpha=0.4):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    history = {
        "train_loss": [],
        "train_acc": [],
        "val_loss": [],
        "val_acc": [],
    }

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):

                        if phase == "train" and use_mixup and (np.random.rand() < mixup_prob):
                            inputs_mixed, y_a, y_b, lam = apply_mixup(inputs, labels, alpha=mixup_alpha)

                            outputs = model(inputs_mixed)
                            _, preds = torch.max(outputs, 1)

                            loss = lam * criterion(outputs, y_a) + (1 - lam) * criterion(outputs, y_b)

                            running_corrects += (float(lam) * torch.sum(preds == y_a).item() +
                                                (1 - float(lam)) * torch.sum(preds == y_b).item())
                        else:
                            outputs = model(inputs)
                            _, preds = torch.max(outputs, 1)

                            loss = criterion(outputs, labels)
                            running_corrects += torch.sum(preds == labels).item()

                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                        
                running_loss += loss.item() * inputs.size(0)      

            epoch_loss = running_loss / dataset_size[phase]
            epoch_acc = running_corrects / dataset_size[phase]

            print(f"{phase} Loss :{epoch_loss:.4f} Acc: {epoch_acc:.4f}")     

            if phase == "train":
                history["train_loss"].append(epoch_loss)    
                history["train_acc"].append(epoch_acc)   
            else:
                history["val_loss"].append(epoch_loss)  
                history["val_acc"].append(epoch_acc)  

            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()              

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val Acc: {best_acc:.4f}")

    model.load_state_dict(best_model_wts)
    return model, history

In [19]:
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

def evaluate_model(model, dataloader, device, class_names):
    model.eval()
    preds_list, labels_list = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            preds_list.extend(preds.cpu().numpy())
            labels_list.extend(labels.cpu().numpy())

    acc = accuracy_score(labels_list, preds_list)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels_list, preds_list, average='macro', zero_division=0
    )

    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-score:  {f1:.4f}")
    print("\nPer-class breakdown:")
    print(classification_report(labels_list, preds_list, target_names=class_names, zero_division=0))

In [21]:
#RESNET-50

resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) #Load pretrained model
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes) #Replace last classifier layer
resnet = resnet.to(device)

optimizer = optim.SGD(resnet.parameters(), lr=0.001) #define optimizer

resnet, resnet_history = train_model(
    model=resnet,
    dataloaders=dataloaders,
    dataset_size=dataset_size,
    device=device,
    criterion=criterion_weighted,
    optimizer=optimizer,
    num_epochs=25,
    use_mixup=False,
    mixup_prob=0.5,
    mixup_alpha=0.4
)


Epoch 1/25
----------
train Loss :1.7937 Acc: 0.1446
val Loss :1.7645 Acc: 0.1952

Epoch 2/25
----------
train Loss :1.7462 Acc: 0.2704
val Loss :1.7145 Acc: 0.3466

Epoch 3/25
----------
train Loss :1.7054 Acc: 0.3938
val Loss :1.6590 Acc: 0.4502

Epoch 4/25
----------
train Loss :1.6549 Acc: 0.5067
val Loss :1.6124 Acc: 0.5259

Epoch 5/25
----------
train Loss :1.6108 Acc: 0.5577
val Loss :1.5549 Acc: 0.5976

Epoch 6/25
----------
train Loss :1.5666 Acc: 0.5844
val Loss :1.4945 Acc: 0.6534

Epoch 7/25
----------
train Loss :1.5175 Acc: 0.6176
val Loss :1.4437 Acc: 0.6773

Epoch 8/25
----------
train Loss :1.4701 Acc: 0.6241
val Loss :1.3876 Acc: 0.6813

Epoch 9/25
----------
train Loss :1.4128 Acc: 0.6528
val Loss :1.3269 Acc: 0.7211

Epoch 10/25
----------
train Loss :1.3689 Acc: 0.6582
val Loss :1.2550 Acc: 0.7331

Epoch 11/25
----------
train Loss :1.3216 Acc: 0.6474
val Loss :1.2577 Acc: 0.7410

Epoch 12/25
----------
train Loss :1.2607 Acc: 0.6880
val Loss :1.1700 Acc: 0.7570

E

In [24]:
print("Res-net test metrics")
evaluate_model(resnet, dataloaders["test"], device, image_datasets["train"].classes)
print("Res-Net test_01 metrics")
evaluate_model(resnet, dataloaders["test_01"], device, image_datasets["train"].classes)

Res-net test metrics
Accuracy:  0.7821
Precision: 0.7506
Recall:    0.7623
F1-score:  0.7524

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.92      0.83      0.87        41
       glass       0.76      0.76      0.76        51
       metal       0.70      0.63      0.67        41
       paper       0.90      0.88      0.89        60
     plastic       0.76      0.80      0.78        49
       trash       0.45      0.67      0.54        15

    accuracy                           0.78       257
   macro avg       0.75      0.76      0.75       257
weighted avg       0.79      0.78      0.79       257

Res-Net test_01 metrics
Accuracy:  0.5000
Precision: 0.5708
Recall:    0.4957
F1-score:  0.4904

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.67      0.39      0.49        31
       glass       0.85      0.64      0.73        44
       metal       0.59      0.38      0.46        45
       

In [22]:
#RESNET-50 photo augmentations

resnet_aug_1 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) #Load pretrained model
resnet_aug_1.fc = nn.Linear(resnet_aug_1.fc.in_features, num_classes) #Replace last classifier layer
resnet_aug_1 = resnet_aug_1.to(device)

optimizer = optim.SGD(resnet_aug_1.parameters(), lr=0.001) #define optimizer

resnet_aug_1, resnet_history_aug_1 = train_model(
    model=resnet_aug_1,
    dataloaders=dataloaders_photo,
    dataset_size=dataset_size_photo,
    device=device,
    criterion=criterion_weighted,
    optimizer=optimizer,
    num_epochs=25,
    use_mixup=False,
    mixup_prob=0.5,
    mixup_alpha=0.4
)


Epoch 1/25
----------
train Loss :1.7624 Acc: 0.2888
val Loss :1.7141 Acc: 0.4183

Epoch 2/25
----------
train Loss :1.7275 Acc: 0.3809
val Loss :1.6665 Acc: 0.5060

Epoch 3/25
----------
train Loss :1.6840 Acc: 0.4651
val Loss :1.6227 Acc: 0.5976

Epoch 4/25
----------
train Loss :1.6401 Acc: 0.5374
val Loss :1.5746 Acc: 0.6335

Epoch 5/25
----------
train Loss :1.6068 Acc: 0.5627
val Loss :1.5240 Acc: 0.7052

Epoch 6/25
----------
train Loss :1.5667 Acc: 0.5914
val Loss :1.4995 Acc: 0.7171

Epoch 7/25
----------
train Loss :1.5226 Acc: 0.6092
val Loss :1.4243 Acc: 0.7450

Epoch 8/25
----------
train Loss :1.4782 Acc: 0.6275
val Loss :1.3623 Acc: 0.7490

Epoch 9/25
----------
train Loss :1.4424 Acc: 0.6216
val Loss :1.3289 Acc: 0.7251

Epoch 10/25
----------
train Loss :1.3977 Acc: 0.6369
val Loss :1.2785 Acc: 0.7610

Epoch 11/25
----------
train Loss :1.3550 Acc: 0.6394
val Loss :1.2231 Acc: 0.7450

Epoch 12/25
----------
train Loss :1.3066 Acc: 0.6622
val Loss :1.1645 Acc: 0.7729

E

In [26]:
print("Res-net test metrics")
evaluate_model(resnet_aug_1, dataloaders_photo["test"], device, image_datasets_photo["train"].classes)
print("Res-Net test_01 metrics")
evaluate_model(resnet_aug_1, dataloaders_photo["test_01"], device, image_datasets_photo["train"].classes)

Res-net test metrics
Accuracy:  0.7665
Precision: 0.7404
Recall:    0.7385
F1-score:  0.7299

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.94      0.76      0.84        41
       glass       0.73      0.84      0.78        51
       metal       0.68      0.63      0.66        41
       paper       0.87      0.88      0.88        60
     plastic       0.88      0.71      0.79        49
       trash       0.35      0.60      0.44        15

    accuracy                           0.77       257
   macro avg       0.74      0.74      0.73       257
weighted avg       0.79      0.77      0.77       257

Res-Net test_01 metrics
Accuracy:  0.5039
Precision: 0.5215
Recall:    0.5122
F1-score:  0.5032

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.50      0.65      0.56        31
       glass       0.70      0.68      0.69        44
       metal       0.61      0.31      0.41        45
       

In [None]:
#RESNET-50 geo augmentations

resnet_aug_2 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) #Load pretrained model
resnet_aug_2.fc = nn.Linear(resnet_aug_2.fc.in_features, num_classes) #Replace last classifier layer
resnet_aug_2 = resnet_aug_2.to(device)

optimizer = optim.SGD(resnet_aug_2.parameters(), lr=0.001) #define optimizer

resnet_aug_2, resnet_history_aug_2 = train_model(
    model=resnet_aug_2,
    dataloaders=dataloaders_geo,
    dataset_size=dataset_size_geo,
    device=device,
    criterion=criterion_weighted,
    optimizer=optimizer,
    num_epochs=25,
    use_mixup=False,
    mixup_prob=0.5,
    mixup_alpha=0.4
)


Epoch 1/25
----------
train Loss :1.7687 Acc: 0.2364
val Loss :1.7330 Acc: 0.3307

Epoch 2/25
----------
train Loss :1.7448 Acc: 0.2941
val Loss :1.7035 Acc: 0.3904

Epoch 3/25
----------
train Loss :1.7173 Acc: 0.3789
val Loss :1.6710 Acc: 0.4861

Epoch 4/25
----------
train Loss :1.6844 Acc: 0.4638
val Loss :1.6158 Acc: 0.5976

Epoch 5/25
----------
train Loss :1.6569 Acc: 0.4786
val Loss :1.5826 Acc: 0.6295

Epoch 6/25
----------
train Loss :1.6345 Acc: 0.5125
val Loss :1.5504 Acc: 0.6574

Epoch 7/25
----------
train Loss :1.5843 Acc: 0.5536
val Loss :1.4939 Acc: 0.7052

Epoch 8/25
----------
train Loss :1.5665 Acc: 0.5399
val Loss :1.4707 Acc: 0.7131

Epoch 9/25
----------
train Loss :1.5434 Acc: 0.5343
val Loss :1.4391 Acc: 0.7012

Epoch 10/25
----------
train Loss :1.4953 Acc: 0.5722
val Loss :1.4140 Acc: 0.7131

Epoch 11/25
----------
train Loss :1.4756 Acc: 0.5737
val Loss :1.3758 Acc: 0.7251

Epoch 12/25
----------
train Loss :1.4449 Acc: 0.5681
val Loss :1.3173 Acc: 0.7450

E

In [27]:
print("Res-net test metrics")
evaluate_model(resnet_aug_2, dataloaders_geo["test"], device, image_datasets_geo["train"].classes)
print("Res-Net test_01 metrics")
evaluate_model(resnet_aug_2, dataloaders_geo["test_01"], device, image_datasets_geo["train"].classes)

Res-net test metrics
Accuracy:  0.7276
Precision: 0.7162
Recall:    0.7079
F1-score:  0.7006

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.90      0.66      0.76        41
       glass       0.71      0.80      0.75        51
       metal       0.57      0.76      0.65        41
       paper       0.83      0.82      0.82        60
     plastic       0.86      0.61      0.71        49
       trash       0.43      0.60      0.50        15

    accuracy                           0.73       257
   macro avg       0.72      0.71      0.70       257
weighted avg       0.76      0.73      0.73       257

Res-Net test_01 metrics
Accuracy:  0.5827
Precision: 0.6410
Recall:    0.5871
F1-score:  0.5886

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.95      0.65      0.77        31
       glass       0.62      0.75      0.68        44
       metal       0.49      0.62      0.55        45
       

In [28]:
#RESNET-50 mixed augmentations

resnet_aug_2 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) #Load pretrained model
resnet_aug_2.fc = nn.Linear(resnet_aug_2.fc.in_features, num_classes) #Replace last classifier layer
resnet_aug_2 = resnet_aug_2.to(device)

optimizer = optim.SGD(resnet_aug_2.parameters(), lr=0.001) #define optimizer

resnet_aug_2, resnet_history_aug_2 = train_model(
    model=resnet_aug_2,
    dataloaders=dataloaders_mix,
    dataset_size=dataset_size_mix,
    device=device,
    criterion=criterion_weighted,
    optimizer=optimizer,
    num_epochs=25,
    use_mixup=False,
    mixup_prob=0.5,
    mixup_alpha=0.4
)


Epoch 1/25
----------
train Loss :1.7857 Acc: 0.1877
val Loss :1.7574 Acc: 0.2948

Epoch 2/25
----------
train Loss :1.7661 Acc: 0.2452
val Loss :1.7322 Acc: 0.3625

Epoch 3/25
----------
train Loss :1.7461 Acc: 0.3284
val Loss :1.7053 Acc: 0.4143

Epoch 4/25
----------
train Loss :1.7300 Acc: 0.3784
val Loss :1.6772 Acc: 0.4980

Epoch 5/25
----------
train Loss :1.7057 Acc: 0.4185
val Loss :1.6590 Acc: 0.5418

Epoch 6/25
----------
train Loss :1.6879 Acc: 0.4532
val Loss :1.6206 Acc: 0.5817

Epoch 7/25
----------
train Loss :1.6645 Acc: 0.4993
val Loss :1.5856 Acc: 0.6335

Epoch 8/25
----------
train Loss :1.6404 Acc: 0.4968
val Loss :1.5562 Acc: 0.6534

Epoch 9/25
----------
train Loss :1.6237 Acc: 0.5106
val Loss :1.5323 Acc: 0.6653

Epoch 10/25
----------
train Loss :1.5884 Acc: 0.5448
val Loss :1.4889 Acc: 0.6972

Epoch 11/25
----------
train Loss :1.5579 Acc: 0.5661
val Loss :1.4621 Acc: 0.6972

Epoch 12/25
----------
train Loss :1.5298 Acc: 0.5656
val Loss :1.4247 Acc: 0.7131

E

In [29]:
print("Res-net test metrics")
evaluate_model(resnet_aug_2, dataloaders_mix["test"], device, image_datasets_mix["train"].classes)
print("Res-Net test_01 metrics")
evaluate_model(resnet_aug_2, dataloaders_mix["test_01"], device, image_datasets_mix["train"].classes)

Res-net test metrics
Accuracy:  0.7121
Precision: 0.6996
Recall:    0.7012
F1-score:  0.6915

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.84      0.78      0.81        41
       glass       0.77      0.65      0.70        51
       metal       0.51      0.76      0.61        41
       paper       0.87      0.75      0.80        60
     plastic       0.79      0.67      0.73        49
       trash       0.43      0.60      0.50        15

    accuracy                           0.71       257
   macro avg       0.70      0.70      0.69       257
weighted avg       0.74      0.71      0.72       257

Res-Net test_01 metrics
Accuracy:  0.5039
Precision: 0.5443
Recall:    0.5183
F1-score:  0.5135

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.63      0.77      0.70        31
       glass       0.68      0.52      0.59        44
       metal       0.33      0.56      0.41        45
       

In [18]:
del resnet
torch.cuda.empty_cache()

In [None]:
#Efficientnet

efficient_net = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) #Load pretrained model

in_features = efficient_net.classifier[1].in_features #Replace last classifier layer
efficient_net.classifier[1] = nn.Linear(in_features, num_classes)

efficient_net = efficient_net.to(device)

criterion = nn.CrossEntropyLoss() #define loss
optimizer = optim.SGD(efficient_net.parameters(), lr=0.001) #define optimizer

efficient_net, efficient_net_history = train_model(
    model=efficient_net,
    dataloaders=dataloaders,
    dataset_size=dataset_size,
    device=device,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=25
)

In [None]:
print("Efficient_net test metrics")
evaluate_model(efficient_net, dataloaders["test"], device, image_datasets["train"].classes)
print("Efficient_net test_01 metrics")
evaluate_model(efficient_net, dataloaders["test_01"], device, image_datasets["train"].classes)


In [None]:
del efficient_net
torch.cuda.empty_cache()

In [None]:
#densenet

densenet = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT) #Load pretrained model
densenet.classifier = nn.Linear(densenet.classifier.in_features, num_classes)
densenet = densenet.to(device)

criterion = nn.CrossEntropyLoss() #define loss
optimizer = optim.SGD(densenet.parameters(), lr=0.001) #define optimizer

densenet, densenet_history = train_model(
    model=densenet,
    dataloaders=dataloaders,
    dataset_size=dataset_size,
    device=device,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=25
)

In [12]:
print("densenet test metrics")
evaluate_model(densenet, dataloaders["test"], device, image_datasets["train"].classes)
print("densenet test_01 metrics")
evaluate_model(densenet, dataloaders["test_01"], device, image_datasets["train"].classes)

densenet test metrics
Accuracy:  0.9066
Precision: 0.9172
Recall:    0.8697
F1-score:  0.8849

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.95      0.98      0.96        41
       glass       0.85      0.86      0.85        51
       metal       0.84      0.88      0.86        41
       paper       0.97      0.98      0.98        60
     plastic       0.90      0.92      0.91        49
       trash       1.00      0.60      0.75        15

    accuracy                           0.91       257
   macro avg       0.92      0.87      0.88       257
weighted avg       0.91      0.91      0.90       257

densenet test_01 metrics
Accuracy:  0.4270
Precision: 0.8017
Recall:    0.4294
F1-score:  0.4185

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.24      1.00      0.39        15
       glass       0.82      0.64      0.72        14
       metal       1.00      0.27      0.42        15
     

In [None]:
del densenet
torch.cuda.empty_cache()


In [15]:
# ConvNeXt-Base
convnext_base = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
in_features = convnext_base.classifier[2].in_features
convnext_base.classifier[2] = nn.Linear(in_features, num_classes)
convnext_base = convnext_base.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(convnext_base.parameters(), lr=0.001)  

convnext_base, convnext_base_history = train_model(
    model=convnext_base,
    dataloaders=dataloaders,
    dataset_size=dataset_size,
    device=device,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=25
)

1.6%

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /home/sera/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth


100.0%


Epoch 1/25
----------
train Loss :1.5678 Acc: 0.4255
val Loss :1.3140 Acc: 0.5817

Epoch 2/25
----------
train Loss :1.1305 Acc: 0.7083
val Loss :0.9192 Acc: 0.7849

Epoch 3/25
----------
train Loss :0.8742 Acc: 0.7940
val Loss :0.7776 Acc: 0.8247

Epoch 4/25
----------
train Loss :0.7217 Acc: 0.8143
val Loss :0.6549 Acc: 0.8327

Epoch 5/25
----------
train Loss :0.6075 Acc: 0.8455
val Loss :0.5063 Acc: 0.8805

Epoch 6/25
----------
train Loss :0.5324 Acc: 0.8623
val Loss :1.1671 Acc: 0.4940

Epoch 7/25
----------
train Loss :0.5099 Acc: 0.8613
val Loss :0.4446 Acc: 0.8805

Epoch 8/25
----------
train Loss :0.4330 Acc: 0.8920
val Loss :0.4732 Acc: 0.8845

Epoch 9/25
----------
train Loss :0.3919 Acc: 0.9000
val Loss :0.3862 Acc: 0.8964

Epoch 10/25
----------
train Loss :0.3516 Acc: 0.9099
val Loss :0.5459 Acc: 0.8406

Epoch 11/25
----------
train Loss :0.3484 Acc: 0.9104
val Loss :0.2957 Acc: 0.9323

Epoch 12/25
----------
train Loss :0.2991 Acc: 0.9292
val Loss :0.5274 Acc: 0.8247

E

In [16]:
print("ConvNext test metrics")
evaluate_model(convnext_base, dataloaders["test"], device, image_datasets["train"].classes)
print("convnext_base test_01 metrics")
evaluate_model(convnext_base, dataloaders["test_01"], device, image_datasets["train"].classes)

ConvNext test metrics
Accuracy:  0.9183
Precision: 0.9298
Recall:    0.8988
F1-score:  0.9091

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.95      0.98      0.96        41
       glass       0.95      0.82      0.88        51
       metal       0.87      0.95      0.91        41
       paper       0.97      0.95      0.96        60
     plastic       0.84      0.96      0.90        49
       trash       1.00      0.73      0.85        15

    accuracy                           0.92       257
   macro avg       0.93      0.90      0.91       257
weighted avg       0.92      0.92      0.92       257

convnext_base test_01 metrics
Accuracy:  0.3708
Precision: 0.5576
Recall:    0.3698
F1-score:  0.3517

Per-class breakdown:
              precision    recall  f1-score   support

   cardboard       0.23      1.00      0.38        15
       glass       1.00      0.29      0.44        14
       metal       0.83      0.33      0.48        15


In [None]:
del convnext_base
torch.cuda.empty_cache()