In [None]:
#https://github.com/Ugenteraan/ResNet-50-CBAM-PyTorch
import torch
import torch.nn as nn
from torchvision.models import resnet50
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import time
import os

In [3]:
'''Convolutional Block Attention Module (CBAM)
'''

import torch
import torch.nn as nn
from torch.nn.modules import pooling
from torch.nn.modules.flatten import Flatten



class Channel_Attention(nn.Module):
    '''Channel Attention in CBAM.
    '''

    def __init__(self, channel_in, reduction_ratio=16, pool_types=['avg', 'max']):
        '''Param init and architecture building.
        '''

        super(Channel_Attention, self).__init__()
        self.pool_types = pool_types

        self.shared_mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=channel_in, out_features=channel_in//reduction_ratio),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=channel_in//reduction_ratio, out_features=channel_in)
        )


    def forward(self, x):
        b, c, h, w = x.size()
    
        # média global
        avg_pool = torch.mean(x, dim=(2, 3), keepdim=True)
    
        # máximo global (reduzindo um eixo por vez)
        max_pool, _ = torch.max(x, dim=2, keepdim=True)
        max_pool, _ = torch.max(max_pool, dim=3, keepdim=True)
    
        # flatten e passa pelo MLP
        avg_out = self.shared_mlp(avg_pool.view(b, c))
        max_out = self.shared_mlp(max_pool.view(b, c))
    
        out = avg_out + max_out
        scale = torch.sigmoid(out).view(b, c, 1, 1)
    
        return x * scale


class ChannelPool(nn.Module):
    '''Merge all the channels in a feature map into two separate channels where the first channel is produced by taking the max values from all channels, while the
       second one is produced by taking the mean from every channel.
    '''
    def forward(self, x):
        return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)


class Spatial_Attention(nn.Module):
    '''Spatial Attention in CBAM.
    '''

    def __init__(self, kernel_size=7):
        '''Spatial Attention Architecture.
        '''

        super(Spatial_Attention, self).__init__()

        self.compress = ChannelPool()
        self.spatial_attention = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, stride=1, dilation=1, padding=(kernel_size-1)//2, bias=False)
            
       


    def forward(self, x):
        '''Forward Propagation.
        '''
        x_compress = self.compress(x)
        x_output = self.spatial_attention(x_compress)
        scaled = torch.sigmoid(x_output)
        return x * scaled


class CBAM(nn.Module):
    '''CBAM architecture.
    '''
    def __init__(self, channel_in, reduction_ratio=16, pool_types=['avg', 'max'], spatial=True):
        '''Param init and arch build.
        '''
        super(CBAM, self).__init__()
        self.spatial = spatial

        self.channel_attention = Channel_Attention(channel_in=channel_in, reduction_ratio=reduction_ratio, pool_types=pool_types)

        if self.spatial:
            self.spatial_attention = Spatial_Attention(kernel_size=7)


    def forward(self, x):
        '''Forward Propagation.
        '''
        x_out = self.channel_attention(x)
        if self.spatial:
            x_out = self.spatial_attention(x_out)

        return x_out

In [4]:
from torchvision.models.resnet import Bottleneck

class ResNet50_CBAM(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        # Carrega o modelo base
        self.base = resnet50(weights="IMAGENET1K_V1")
        
        # Congela as primeiras camadas (opcional, pode descongelar depois)
        for param in list(self.base.parameters())[:100]:
            param.requires_grad = False
        
        # Adiciona CBAM após as camadas 3 e 4
        self.cbam3 = CBAM(channel_in=1024)
        self.cbam4 = CBAM(channel_in=2048)
        
        # Substitui a última camada totalmente conectada
        self.base.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        # Camadas originais até layer3
        x = self.base.conv1(x)
        x = self.base.bn1(x)
        x = self.base.relu(x)
        x = self.base.maxpool(x)

        x = self.base.layer1(x)
        x = self.base.layer2(x)
        x = self.base.layer3(x)
        x = self.cbam3(x)  # CBAM aqui
        x = self.base.layer4(x)
        x_conv = self.cbam4(x)  # salvar as features com atenção

        x = self.base.avgpool(x_conv)
        x = torch.flatten(x, 1)
        x = self.base.fc(x)
        return x_conv,x

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_dir = '/kaggle/input/cub-200-2011/images'
num_classes = 200
batch_size = 16
num_epochs = 15
learning_rate = 1e-5

In [6]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(448),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(512),
    transforms.CenterCrop(448),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [7]:
train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=train_transforms)
val_dataset = datasets.ImageFolder(root=f"{data_dir}/test", transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [8]:
model = ResNet50_CBAM(num_classes=200)
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 184MB/s] 


In [9]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.3, patience=3)

In [10]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"\nÉpoca {epoch+1}/{num_epochs}")
        print("-" * 30)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                dataloader = train_loader
            else:
                model.eval()
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    x_conv, outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'val':
                scheduler.step(epoch_loss)

                # Salvar melhor modelo
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    #if Config.SAVE_MODEL_DIR is not None:
                     #   save_path = os.path.join(Config.SAVE_MODEL_DIR, "best_resnet50_cub_fullfinetune.pth")
                    #else:
                    save_path = "best_resnet50_cub_fullfinetune.pth"

                    torch.save(model.state_dict(), save_path)

    print(f"\nTreinamento concluído! Melhor acurácia de validação: {best_acc:.4f}")

In [11]:
train_model(model, criterion, optimizer, scheduler, num_epochs=num_epochs)


Época 1/15
------------------------------
train Loss: 5.2325 Acc: 0.0228
val Loss: 4.9626 Acc: 0.1129

Época 2/15
------------------------------
train Loss: 4.8208 Acc: 0.1042
val Loss: 4.2306 Acc: 0.1988

Época 3/15
------------------------------
train Loss: 4.3376 Acc: 0.1651
val Loss: 3.6891 Acc: 0.2881

Época 4/15
------------------------------
train Loss: 3.9192 Acc: 0.2387
val Loss: 3.1690 Acc: 0.3749

Época 5/15
------------------------------
train Loss: 3.5738 Acc: 0.3157
val Loss: 2.7790 Acc: 0.4516

Época 6/15
------------------------------
train Loss: 3.2864 Acc: 0.3753
val Loss: 2.5399 Acc: 0.5118

Época 7/15
------------------------------
train Loss: 3.0547 Acc: 0.4279
val Loss: 2.2453 Acc: 0.5569

Época 8/15
------------------------------
train Loss: 2.8518 Acc: 0.4615
val Loss: 2.0415 Acc: 0.5830

Época 9/15
------------------------------
train Loss: 2.6683 Acc: 0.4935
val Loss: 1.8543 Acc: 0.6209

Época 10/15
------------------------------
train Loss: 2.5139 Acc: 0.515