In [None]:
%env CUDA_LAUNCH_BLOCKING=1
import torchvision
import torch
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

import timm
from datasets import load_dataset, DatasetDict
from torch.utils.data import DataLoader
from torchvision import transforms

from torch import optim
import sklearn.metrics as metrics
from sklearn.metrics import classification_report

Carrega o dataset e o divide em: treino, teste e validação

In [None]:
dataset = load_dataset("cifar100")
dataset2 = load_dataset("cifar100")

split = dataset["train"].train_test_split(test_size=0.1, seed=42)
split2 = dataset2["train"].train_test_split(test_size=0.1, seed=42)

dataset = DatasetDict({"train": split["train"], "val": split["test"], "test": dataset["test"]})
dataset2 = DatasetDict({"train": split2["train"], "val": split2["test"], "test": dataset2["test"]})

Aplica as transformações e argumentações no datase

In [None]:
transform_224 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
def transform_batch(example_batch):
    imgs = [transform_224(img.convert("RGB")) for img in example_batch["img"]]
    example_batch["pixel_values"] = torch.stack(imgs)
    example_batch["labelsc"] = example_batch["fine_label"]
    example_batch["labelssc"] = example_batch["coarse_label"]
    del example_batch["img"]
    return example_batch

dataset["train"].set_transform(transform_batch)
dataset["val"].set_transform(transform_batch)
dataset["test"].set_transform(transform_batch)


In [None]:
map_20_to_10 = {
    0:0, 1:0,
    2:1, 3:1,
    4:2, 5:2,
    6:3, 7:3,
    8:4, 9:4,
    10:5, 11:5,
    12:6, 13:6,
    14:7, 15:7,
    16:8, 17:8,
    18:9, 19:9,
}

def map_coarse_tensor(tensor20):
    mapped = tensor20.cpu().numpy()
    mapped = [map_20_to_10[int(x)] for x in mapped]
    return torch.tensor(mapped, dtype=torch.long)

In [None]:
def transform_batch2(example_batch):
    imgs = [transform_224(img.convert("RGB")) for img in example_batch["img"]]
    example_batch["pixel_values"] = torch.stack(imgs)
    example_batch["labelsc"] = example_batch["fine_label"]
    example_batch["labelssc"] = map_coarse_tensor(torch.tensor(example_batch["coarse_label"], dtype=torch.long))
    del example_batch["img"]
    return example_batch

dataset2["train"].set_transform(transform_batch2)
dataset2["val"].set_transform(transform_batch2)
dataset2["test"].set_transform(transform_batch2)

In [None]:
train_loader = DataLoader(dataset["train"], batch_size=128, shuffle=True, num_workers=2)
val_loader = DataLoader(dataset["val"], batch_size=128, shuffle=False, num_workers=2)
test_loader = DataLoader(dataset["test"], batch_size=128, shuffle=False, num_workers=2)

train_loader2 = DataLoader(dataset2["train"], batch_size=128, shuffle=True, num_workers=2)
val_loader2 = DataLoader(dataset2["val"], batch_size=128, shuffle=False, num_workers=2)
test_loader2 = DataLoader(dataset2["test"], batch_size=128, shuffle=False, num_workers=2)

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

modelo 100 classes

In [None]:
model = timm.create_model('mobilenetv3_small_100', pretrained=True, num_classes=100)
model.to(device)

loss_train = []
loss_eval  = []
patience_time = 15
criterion = nn.CrossEntropyLoss()
opt = optim.AdamW(model.parameters(),lr=0.01)
epochs = 100
epoch = 0

Treino

In [None]:
stop = False
lowest_loss_eval = 10000
last_best_result = 0

while (not stop):
    model.train()
    lloss = []
    for batch in train_loader:
        images = batch["pixel_values"].to(device)
        labels = batch["labelsc"].long().to(device)
        pred = model(images)
        closs = criterion(pred,labels)
        closs.backward()
        opt.step()
        opt.zero_grad()
        lloss.append(closs.item())
    loss_train.append(np.mean(lloss))
    lloss = []
    model.eval()
    lres = []
    lbtrue = []
    with torch.no_grad():
        for batch in val_loader:
            images = batch["pixel_values"].to(device)
            labels = batch["labelsc"].long().to(device)

            pred = model(images)
            closs = criterion(pred,labels)
            lloss.append(closs.item())
            res  = pred.argmax(dim=1).tolist()
            lres += res
            lbtrue += labels.cpu()
        avg_loss_eval = np.mean(lloss)
        loss_eval.append(avg_loss_eval)
        if avg_loss_eval < lowest_loss_eval:
            lowest_loss_eval = avg_loss_eval
            last_best_result = 0
            print("Best model found! saving...")
            actual_state = {'optim':opt.state_dict(),'model':model.state_dict(),'epoch':epoch,'loss_train':loss_train,'loss_eval':loss_eval}
            torch.save(actual_state,'best_modelc.pth')
        last_best_result += 1
        if last_best_result > patience_time:
            stop = True
        print("epoch %d loss_train %4.3f loss_eval %4.3f last_best %d"%(epoch,loss_train[-1],loss_eval[-1],last_best_result))
        epoch += 1

classification report

In [None]:
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        images = batch["pixel_values"].to(device)
        labels = batch["labelsc"].to(device)

        outputs = model(images)
        preds = outputs.argmax(dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())


print(classification_report(all_labels, all_preds))

modelo 20 super-classes

In [None]:
model = timm.create_model('mobilenetv3_small_100', pretrained=True, num_classes=20)
model.to(device)

loss_train = []
loss_eval  = []
patience_time = 15
criterion = nn.CrossEntropyLoss()
opt = optim.AdamW(model.parameters(),lr=0.01)
epochs = 100
epoch = 0

Treino

In [None]:
stop = False
lowest_loss_eval = 10000
last_best_result = 0

while (not stop):
    model.train()
    lloss = []
    for batch in train_loader:
        images = batch["pixel_values"].to(device)
        labels = batch["labelssc"].long().to(device)
        pred = model(images)
        closs = criterion(pred,labels)
        closs.backward()
        opt.step()
        opt.zero_grad()
        lloss.append(closs.item())
    loss_train.append(np.mean(lloss))
    lloss = []
    model.eval()
    lres = []
    lbtrue = []
    with torch.no_grad():
        for batch in val_loader:
            images = batch["pixel_values"].to(device)
            labels = batch["labelssc"].long().to(device)

            pred = model(images)
            closs = criterion(pred,labels)
            lloss.append(closs.item())
            res  = pred.argmax(dim=1).tolist()
            lres += res
            lbtrue += labels.cpu()
        avg_loss_eval = np.mean(lloss)
        loss_eval.append(avg_loss_eval)
        if avg_loss_eval < lowest_loss_eval:
            lowest_loss_eval = avg_loss_eval
            last_best_result = 0
            print("Best model found! saving...")
            actual_state = {'optim':opt.state_dict(),'model':model.state_dict(),'epoch':epoch,'loss_train':loss_train,'loss_eval':loss_eval}
            torch.save(actual_state,'best_modelsc.pth')
        last_best_result += 1
        if last_best_result > patience_time:
            stop = True
        print("epoch %d loss_train %4.3f loss_eval %4.3f last_best %d"%(epoch,loss_train[-1],loss_eval[-1],last_best_result))
        epoch += 1

classification report

In [None]:
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        images = batch["pixel_values"].to(device)
        labels = batch["labelssc"].to(device)

        outputs = model(images)
        preds = outputs.argmax(dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())


print(classification_report(all_labels, all_preds))

modelo multihead

In [None]:
class MultiHeadMobileNetV3(nn.Module):
    def __init__(self, pretrained=True, num_classes_fine=100, num_classes_coarse=10):
        super().__init__()

        base_model = timm.create_model("mobilenetv3_small_100", pretrained=True, num_classes=0)

        self.backbone = base_model


        in_features = 1024
        self.head_fine = nn.Linear(in_features, num_classes_fine)
        self.head_coarse = nn.Linear(in_features, num_classes_coarse)

    def forward(self, x):
        feats = self.backbone(x)
        out_fine = self.head_fine(feats)
        out_coarse = self.head_coarse(feats)
        return out_fine, out_coarse

In [None]:
model = MultiHeadMobileNetV3()
model.to(device)

loss_train = []
loss_eval  = []
patience_time = 15
criterion = nn.CrossEntropyLoss()
opt = optim.AdamW(model.parameters(),lr=0.01)
epochs = 100
epoch = 0

Treino

In [None]:
stop = False
lowest_loss_eval = 10000
last_best_result = 0

while (not stop):
    model.train()
    lloss = []
    for batch in train_loader2:
        images = batch["pixel_values"].to(device)
        labels_fine = batch["labelsc"].long().to(device)
        labels_coarse = batch["labelssc"].long().to(device)
        pred_fine, pred_coarse = model(images)
        loss_fine = criterion(pred_fine, labels_fine)
        loss_coarse = criterion(pred_coarse, labels_coarse)
        loss = 0.7 * loss_fine + 0.3 * loss_coarse
        loss.backward()
        opt.step()
        opt.zero_grad()
        lloss.append(loss.item())
    loss_train.append(np.mean(lloss))
    lloss = []
    model.eval()
    lresf = []
    lresc = []
    lbtruef = []
    lbtruec = []
    with torch.no_grad():
        for batch in val_loader2:
            images = batch["pixel_values"].to(device)
            labels_fine = batch["labelsc"].long().to(device)
            labels_coarse = batch["labelssc"].long().to(device)
            if labels_fine.min() < 0 or labels_fine.max() >= 100:
                print("Label fine fora do range:", labels_fine.min().item(), labels_fine.max().item())
            if labels_coarse.min() < 0 or labels_coarse.max() >= 10:
                print("Label coarse fora do range:", labels_coarse.min().item(), labels_coarse.max().item())
            pred_fine, pred_coarse = model(images)
            loss_fine = criterion(pred_fine, labels_fine)
            loss_coarse = criterion(pred_coarse, labels_coarse)
            loss = 0.7 * loss_fine + 0.3 * loss_coarse
            lloss.append(loss.item())
            res  = pred_fine.argmax(dim=1).tolist()
            lresf += res
            res  = pred_coarse.argmax(dim=1).tolist()
            lresc += res
            lbtruef += labels_fine.cpu().tolist()
            lbtruec += labels_coarse.cpu().tolist()

        avg_loss_eval = np.mean(lloss)
        loss_eval.append(avg_loss_eval)
        if avg_loss_eval < lowest_loss_eval:
            lowest_loss_eval = avg_loss_eval
            last_best_result = 0
            print("Best model found! saving...")
            actual_state = {'optim':opt.state_dict(),'model':model.state_dict(),'epoch':epoch,'loss_train':loss_train,'loss_eval':loss_eval}
            torch.save(actual_state,'best_modelmh.pth')
        last_best_result += 1
        if last_best_result > patience_time:
            stop = True
        print("epoch %d loss_train %4.3f loss_eval %4.3f last_best %d"%(epoch,loss_train[-1],loss_eval[-1],last_best_result))
        epoch += 1

classification report

In [None]:
all_preds_fine = []
all_labels_fine = []
all_preds_coarse = []
all_labels_coarse = []
with torch.no_grad():
    for batch in test_loader2:
        images = batch["pixel_values"].to(device)
        labels_fine = batch["labelsc"].to(device)
        labels_coarse = batch["labelssc"].to(device)

        out_fine, out_coarse = model(images)

        preds_fine = out_fine.argmax(dim=1).cpu().numpy()
        preds_coarse = out_coarse.argmax(dim=1).cpu().numpy()

        all_preds_fine.extend(preds_fine)
        all_labels_fine.extend(labels_fine.cpu().numpy())
        all_preds_coarse.extend(preds_coarse)
        all_labels_coarse.extend(labels_coarse.cpu().numpy())


print(classification_report(all_labels_fine, all_preds_fine))
print(classification_report(all_labels_coarse, all_preds_coarse))