In [3]:
import sys
import os

# Add parent directory to path so we can import DatasetLoader
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import torch
from torchvision import transforms
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import shutil
from torchvision import models
from DatasetLoader.cub_v2 import cub200
from torch.nn import init

# Training

In [4]:
# COSTANTI
DEFAULT_BATCH_SIZE   = 64
DEFAULT_BASE_LR      = 0.001
DEFAULT_EPOCHS       = 95
DEFAULT_MOMENTUM     = 0.9
DEFAULT_WEIGHT_DECAY = 1e-4
DEFAULT_GPU_ID       = 0
DEFAULT_IMG_SIZE     = 448

MODEL_CHOICES        = [50, 101, 152]


EXPANSION = 4
MODEL_SAVE_PATH = './model_save'
DATASET_ROOT = '../CUB/DATASET/CUB_200_2011' 


In [5]:
'''
Funzione utilizzata per l'inizializzazione dei pesi della rete con il metodo di Kaiming He.
'''
def weight_init_kaiming(m):
    class_names = m.__class__.__name__
    if class_names.find('Conv') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
    elif class_names.find('Linear') != -1:
        init.kaiming_normal_(m.weight.data)
        #init.constant_(m.bias.data, 0.0)

In [6]:

class ResNet(nn.Module):
    def __init__(self, pre_trained=True, n_class=200, model_choice=50):
        super(ResNet, self).__init__()
        self.n_class = n_class
        self.base_model = self._model_choice(pre_trained, model_choice)
        self.base_model.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.base_model.fc = nn.Linear(512*EXPANSION, n_class)
        self.base_model.fc.apply(weight_init_kaiming)

    def forward(self, x):
        N = x.size(0)
        assert x.size() == (N, 3, 448, 448)
        x = self.base_model(x)
        assert x.size() == (N, self.n_class)
        return x

    def _model_choice(self, pre_trained, model_choice):
        if model_choice == 50:
            return models.resnet50(pretrained=pre_trained)
        elif model_choice == 101:
            return models.resnet101(pretrained=pre_trained)
        elif model_choice == 152:
            return models.resnet152(pretrained=pre_trained)

In [7]:


from DatasetLoader.cub_v2 import cub200


class NetworkManager(object):
    def __init__(self, options, path):
        self.options = options
        self.path = path
        self.device = options['device']

        print('Starting to prepare network and data...')

        self.net = nn.DataParallel(self._net_choice(self.options['net_choice'])).to(self.device)
        #self.net.load_state_dict(torch.load('/home/zhangyongshun/se_base_model/model_save/ResNet/backup/epoch120/ResNet50-finetune_fc_cub.pkl'))
        print('Network is as follows:')
        print(self.net)
        #print(self.net.state_dict())
        self.criterion = nn.CrossEntropyLoss()
        self.solver = torch.optim.SGD(
            self.net.parameters(), lr=self.options['base_lr'], momentum=self.options['momentum'], weight_decay=self.options['weight_decay']
        )
        self.schedule = torch.optim.lr_scheduler.StepLR(self.solver, step_size=30, gamma=0.1)
        #self.schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(
        #    self.solver, mode='max', factor=0.1, patience=3, verbose=True, threshold=1e-4
        #)

        train_transform_list = [
            transforms.RandomResizedCrop(self.options['img_size']),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ]
        test_transforms_list = [
            transforms.Resize(int(self.options['img_size']/0.875)),
            transforms.CenterCrop(self.options['img_size']),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ]
        
        # train_data = CUB(root=DATASET_ROOT, dataset_type='train',  transform=train_transform_list)
        # test_data = CUB(root=DATASET_ROOT, dataset_type='test',  transform=test_transforms_list)
        train_data = cub200(self.path['data'], train=True, transform=transforms.Compose(train_transform_list))
        test_data = cub200(self.path['data'], train=False, transform=transforms.Compose(test_transforms_list))
        
        self.train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=self.options['batch_size'], shuffle=True, num_workers=4, pin_memory=True
        )
        self.test_loader = torch.utils.data.DataLoader(
            test_data, batch_size=16, shuffle=False, num_workers=4, pin_memory=True
        )

    def train(self):
        epochs  = np.arange(1, self.options['epochs']+1)
        test_acc = list()
        train_acc = list()
        print('Training process starts:...')
        if torch.cuda.device_count() > 1:
            print('More than one GPU are used...')
        print('Epoch\tTrainLoss\tTrainAcc\tTestAcc')
        print('-'*50)
        best_acc = 0.0
        best_epoch = 0
        self.net.train(True)
        for epoch in range(self.options['epochs']):
            num_correct = 0
            train_loss_epoch = list()
            num_total = 0
            for imgs, labels in self.train_loader:
                self.solver.zero_grad()
                imgs = imgs.to(self.device)
                labels = labels.to(self.device)
                output = self.net(imgs)
                loss = self.criterion(output, labels)
                _, pred = torch.max(output, 1)
                num_correct += torch.sum(pred == labels.detach_())
                num_total += labels.size(0)
                train_loss_epoch.append(loss.item())
                loss.backward()
                #nn.utils.clip_grad_norm_(self.net.parameters(), 1.0)
                self.solver.step()

            train_acc_epoch = num_correct.detach().cpu().numpy()*100 / num_total
            avg_train_loss_epoch  = sum(train_loss_epoch)/len(train_loss_epoch)
            test_acc_epoch = self._accuracy()
            test_acc.append(test_acc_epoch)
            train_acc.append(train_acc_epoch)
            self.schedule.step()
            if test_acc_epoch>best_acc:
                best_acc = test_acc_epoch
                best_epoch = epoch+1
                print('*', end='')
                torch.save(self.net.state_dict(), os.path.join(self.path['model_save'], self.options['net_choice'], self.options['net_choice']+str(self.options['model_choice'])+'.pkl'))
                
            print('{}\t{:.4f}\t{:.2f}%\t{:.2f}%'.format(epoch+1, avg_train_loss_epoch, train_acc_epoch, test_acc_epoch))
        plt.figure()
        plt.plot(epochs, test_acc, color='r', label='Test Acc')
        plt.plot(epochs, train_acc, color='b', label='Train Acc')

        plt.xlabel('epochs')
        plt.ylabel('Acc')
        plt.legend()
        plt.title(self.options['net_choice']+str(self.options['model_choice']))
        # plt.savefig(self.options['net_choice']+str(self.options['model_choice'])+'.png')

    def _accuracy(self):
        self.net.eval()
        num_total = 0
        num_acc = 0
        with torch.no_grad():
            for imgs, labels in self.test_loader:
                imgs = imgs.to(self.device)
                labels = labels.to(self.device)
                output = self.net(imgs)
                _, pred = torch.max(output, 1)
                num_acc += torch.sum(pred==labels.detach_())
                num_total += labels.size(0)
        return num_acc.detach().cpu().numpy()*100/num_total

    def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
        torch.save(state, filename)
        if is_best:
            shutil.copyfile(filename, 'model_best.pth.tar')

    def load_checkpoint(self, checkpoint_path):
        if os.path.isfile(checkpoint_path):
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path)
            self.net.load_state_dict(checkpoint['state_dict'])
            self.solver.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(checkpoint_path, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(checkpoint_path))
    
    def _net_choice(self, net_choice):
        if net_choice=='ResNet':
            return ResNet(pre_trained=True, n_class=200, model_choice=self.options['model_choice'])
        # elif net_choice=='ResNet_ED':
        #     return ResNet_ED(pre_trained=True, pre_trained_weight_gpu=True, n_class=200, model_choice=self.options['model_choice'])
        # elif net_choice == 'ResNet_SE':
        #     return ResNet_SE(pre_trained=True, pre_trained_weight_gpu=True, n_class=200, model_choice=self.options['model_choice'])
        # elif net_choice == 'ResNet_self':
        #     return ResNet_self(pre_trained=True, pre_trained_weight_gpu=True, n_class=200, model_choice=self.options['model_choice'])

    def adjust_learning_rate(optimizer, epoch, args):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        lr = args.lr * (0.1 ** (epoch // 30))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [8]:
options = {
    'net_choice': "ResNet",
    'model_choice': MODEL_CHOICES[0],
    'epochs': DEFAULT_EPOCHS,
    'batch_size': DEFAULT_BATCH_SIZE,
    'base_lr': DEFAULT_BASE_LR,
    'weight_decay': DEFAULT_WEIGHT_DECAY,
    'momentum': DEFAULT_MOMENTUM,
    'img_size': DEFAULT_IMG_SIZE,
    'device': torch.device('cuda:'+str(DEFAULT_GPU_ID) if torch.cuda.is_available() else 'cpu')
}

path = {
    'data': DATASET_ROOT,
    'model_save': MODEL_SAVE_PATH
}
  
manager = NetworkManager(options, path)
manager.train()

Starting to prepare network and data...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/gian/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
  0%|          | 0.00/97.8M [00:00<?, ?B/s]Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/gian/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:09<00:00, 10.3MB/s]



Network is as follows:
DataParallel(
  (module): ResNet(
    (base_model): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

AssertionError: 

# Explainability

In [None]:
!pip install grad-cam

^C
Note: you may need to restart the kernel to use updated packages.


Collecting grad-cam
  Downloading grad-cam-1.5.5.tar.gz (7.8 MB)
     ---------------------------------------- 0.0/7.8 MB ? eta -:--:--
     -- ------------------------------------- 0.5/7.8 MB 5.6 MB/s eta 0:00:02
     ---------- ----------------------------- 2.1/7.8 MB 6.9 MB/s eta 0:00:01
     ------------------ --------------------- 3.7/7.8 MB 7.3 MB/s eta 0:00:01
     -------------------------- ------------- 5.2/7.8 MB 7.2 MB/s eta 0:00:01
     --------------------------------- ------ 6.6/7.8 MB 6.9 MB/s eta 0:00:01
     ---------------------------------------- 7.8/7.8 MB 7.2 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting ttach (from grad-cam)
  Downloading ttach-0.0.3-py3-non


[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import torch
import os
import numpy as np
from torchvision import transforms
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from torchvision.models import resnet50
import matplotlib.pyplot as plt

# ---- MODEL SETUP ----
model = ResNet(pre_trained=True, n_class=200, model_choice=50)
model.load_checkpoint(os.path.join(MODEL_SAVE_PATH, 'ResNet', 'ResNet50.pkl'))
model.eval()

target_layers = [model.layer4[-1]]

# ---- DATASET & DATALOADERS ----
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(DEFAULT_IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

test_transform = transforms.Compose([
    transforms.Resize(int(DEFAULT_IMG_SIZE / 0.875)),
    transforms.CenterCrop(DEFAULT_IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

train_data = cub200(path['data'], train=True, transform=transforms.Compose(train_transform))
test_data = cub200(path['data'], train=False, transform=transforms.Compose(test_transform))

train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=DEFAULT_BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True
)

# ---- GET ONE IMAGE ----
images, labels = next(iter(train_loader))
input_tensor = images[0].unsqueeze(0)  # aggiunge dimensione batch
target_class = labels[0].item()

# ---- CONVERT IMAGE TO RGB (0-1) for visualization ----
# rimuovi normalizzazione
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
rgb_img = images[0].permute(1, 2, 0).cpu().numpy()
rgb_img = std * rgb_img + mean
rgb_img = np.clip(rgb_img, 0, 1)

# ---- GRAD-CAM ----
targets = [ClassifierOutputTarget(target_class)]

with GradCAM(model=model, target_layers=target_layers) as cam:
    grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
    grayscale_cam = grayscale_cam[0, :]

# ---- VISUALIZATION ----
visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
plt.imshow(visualization)
plt.title(f"Grad-CAM (Class: {target_class})")
plt.axis('off')
plt.show()
