In [62]:
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import os
import pandas as pd
from PIL import Image
import torch.optim as optim
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np 
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

In [63]:
# def initialize_weight_goog(m):
#     if isinstance(m, nn.Linear):
#         fan_out = m.weight.size(0)
#         fan_in = 0
#         init_range = 1.0 / math.sqrt(fan_in + fan_out)
#         m.weight.data.uniform_(-init_range, init_range)
#         m.bias.data.zero_(0)

In [64]:
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

In [65]:
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 16, 1, bias=False),
                               nn.ReLU(),
                               nn.Conv2d(in_planes // 16, in_planes, 1, bias=False))
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

In [66]:
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

In [67]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.ca = ChannelAttention(planes)
        self.sa = SpatialAttention()

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out = self.ca(out) * out
        out = self.sa(out) * out

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

In [68]:
class ResNetWithSCN(nn.Module):

    def __init__(self, block, layers, num_heads, num_classes=7):
        self.inplanes = 64
        # self.embed_dim = embed_dim
        self.num_heads = num_heads
        super(ResNetWithSCN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        # self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.attention = nn.MultiheadAttention(256, num_heads)
        self.layer_norm = nn.LayerNorm(256)
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
        self.linear1 = nn.Linear(256,128)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)   
        self.classifier = nn.Linear(128, num_classes)
            # nn.Linear(256,128),
            # nn.ReLU(),
            # nn.Dropout(0.5),
            # nn.Linear(128, num_classes)
        
        self.alpha = nn.Sequential(
            # nn.Linear(256,128),
            # nn.ReLU(),
            # nn.Dropout(0.5),
            # nn.Linear(128, num_classes)
            nn.Linear(128,1),
            nn.Sigmoid()
        )

        for m in self.classifier.modules():
            if isinstance(m, nn.Linear):
                fan_out = m.weight.size(0)  # fan-out
                fan_in = m.weight.size(1) if m.weight.dim() > 1 else 0  # fan-in
                init_range = 1.0 / math.sqrt(fan_in + fan_out)
                m.weight.data.uniform_(-init_range, init_range)
                if m.bias is not None:
                    m.bias.data.zero_()
        for m in self.alpha.modules():
            if isinstance(m, nn.Linear):
                fan_out = m.weight.size(0)  # fan-out
                fan_in = m.weight.size(1) if m.weight.dim() > 1 else 0  # fan-in
                init_range = 1.0 / math.sqrt(fan_in + fan_out)
                m.weight.data.uniform_(-init_range, init_range)
                if m.bias is not None:
                    m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        # x = self.layer4(x)

        # x = self.avgpool(x)
        # x = x.view(x.size(0), -1)
        # x = self.fc(x)
        batch_size, channels, height, width = x.shape
        x = x.view(batch_size, channels, height*width).permute(0,2,1)

        query = x
        key = x
        value = x

        attn_output, _ = self.attention(query, key, value)
        x = self.layer_norm(attn_output + x)
        x = x.permute(0,2,1).view(batch_size, channels, height, width)
        # x = nn.functional.adaptive_avg_pool2d(x,1).view(batch_size, -1)
        x = self.global_avg_pool(x).view(batch_size, -1)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        attention_weights = self.alpha(x)
        out = attention_weights * self.classifier(x)

        return attention_weights, out

In [69]:
def resnet18_cbam_with_SCN(pretrained=True, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    num_heads = 4
    model = ResNetWithSCN(BasicBlock, [2, 2, 2, 2], num_heads)
    # if pretrained:
    #     pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
    #     now_state_dict        = model.state_dict()
    #     now_state_dict.update(pretrained_state_dict)
    #     model.load_state_dict(now_state_dict, strict=False)
    
    # New Code Starts From Here
    if pretrained:
        checkpoint = torch.load(r"/kaggle/input/model-pretrained-weights/tot20epoch_4heads_epoch_13_loss_0.07219641906097725.pth")
        model.load_state_dict(checkpoint['model_state_dict'],strict=False)
    # Ends Here

    
    return model

In [70]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [71]:
# model = resnet18_cbam_with_SCN()
# model.to(device)

In [72]:
class CustomImageDataset(Dataset):
    def __init__(self, image_dir, csv_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform

        # Read the CSV file
        self.data_frame = pd.read_csv(csv_file)

        # Ensure the CSV file has columns 'filename' and 'class'
        assert 'image' in self.data_frame.columns
        assert 'label' in self.data_frame.columns
        
        self.labels = self.data_frame['label'].values - 1

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        # Get the filename and class label
        img_name = self.data_frame.iloc[idx, 0]  # Get the filename from the CSV
        class_label = self.labels[idx]  # Get the class label from the CSV

        # Construct the path to the image based on its class label
        class_folder = os.path.join(self.image_dir, str(class_label+1))  # Convert class label to string
        img_path = os.path.join(class_folder, img_name)

        # Load the image
        image = Image.open(img_path).convert('RGB')

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, class_label, idx

In [73]:
os.getcwd()

'/kaggle/working'

In [74]:
os.makedirs('models', exist_ok=True)

In [76]:
def run_training():

    # args = parse_args()
    # imagenet_pretrained = True
    # res18 = ResNetWithSCN(pretrained = imagenet_pretrained, drop_rate = args.drop_rate)
    model = resnet18_cbam_with_SCN()
    model.to(device)
    
    # if not imagenet_pretrained:
    #      for m in res18.modules():
    #         initialize_weight_goog(m)
            
    # if args.pretrained:
    #     print("Loading pretrained weights...", args.pretrained) 
    #     pretrained = torch.load(args.pretrained)
    #     pretrained_state_dict = pretrained['state_dict']
    #     model_state_dict = res18.state_dict()
    #     loaded_keys = 0
    #     total_keys = 0
    #     for key in pretrained_state_dict:
    #         if  ((key=='module.fc.weight')|(key=='module.fc.bias')):
    #             pass
    #         else:    
    #             model_state_dict[key] = pretrained_state_dict[key]
    #             total_keys+=1
    #             if key in model_state_dict:
    #                 loaded_keys+=1
    #     print("Loaded params num:", loaded_keys)
    #     print("Total params num:", total_keys)
    #     res18.load_state_dict(model_state_dict, strict = False)  
        
    # data_transforms = transforms.Compose([
    #     transforms.ToPILImage(),
    #     transforms.Resize((224, 224)),
    #     transforms.ToTensor(),
    #     transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                              std=[0.229, 0.224, 0.225]),
    #     transforms.RandomErasing(scale=(0.02,0.25))])

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    
    # train_dataset = RafDataSet(args.raf_path, phase = 'train', transform = data_transforms, basic_aug = True)  
    image_directory = r"/kaggle/input/raf-db-dataset/DATASET/train"  # Directory containing class subfolders
    csv_file_path = r"/kaggle/input/raf-db-dataset/train_labels.csv"
    train_dataset = CustomImageDataset(image_dir=image_directory, csv_file=csv_file_path, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=32, num_workers=4, pin_memory=True, shuffle=True)
    
    # print('Train set size:', train_dataset.__len__())
    # train_loader = torch.utils.data.DataLoader(train_dataset,
    #                                            batch_size = args.batch_size,
    #                                            num_workers = args.workers,
    #                                            shuffle = True,  
    #                                            pin_memory = True)
    test_image_directory = r"/kaggle/input/raf-db-dataset/DATASET/test"
    test_csv_file_path = r"/kaggle/input/raf-db-dataset/test_labels.csv"
    test_dataset = CustomImageDataset(test_image_directory, test_csv_file_path, transform)
    test_loader = DataLoader(test_dataset, batch_size=32,shuffle=False)
    # data_transforms_val = transforms.Compose([
    #     transforms.ToPILImage(),
    #     transforms.Resize((224, 224)),
    #     transforms.ToTensor(),
    #     transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                              std=[0.229, 0.224, 0.225])])                                           
    # val_dataset = RafDataSet(args.raf_path, phase = 'test', transform = data_transforms_val)    
    # print('Validation set size:', val_dataset.__len__())
    
    # val_loader = torch.utils.data.DataLoader(val_dataset,
    #                                            batch_size = args.batch_size,
    #                                            num_workers = args.workers,
    #                                            shuffle = False,  
    #                                            pin_memory = True)
    
    params = model.parameters()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    
    # if args.optimizer == 'adam':
    #     optimizer = torch.optim.Adam(params,weight_decay = 1e-4)
    # elif args.optimizer == 'sgd':
    #     optimizer = torch.optim.SGD(params, args.lr,
    #                                 momentum=args.momentum,
    #                                 weight_decay = 1e-4)
    # else:
    #     raise ValueError("Optimizer not supported.")
    
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.9)
    # res18 = res18.cuda()
    # criterion = torch.nn.CrossEntropyLoss()
    
    margin_1 = 0.15
    margin_2 = 0.20
    beta = 0.7
    
    for i in range(1, 20 + 1):
        running_loss = 0.0
        correct_sum = 0
        iter_cnt = 0
        model.train()
        # for batch_i, (imgs, targets, indexes) in enumerate(train_loader):
        for images, labels, indexes in tqdm(train_loader):
            batch_sz = images.size(0) 
            iter_cnt += 1
            tops = int(batch_sz* beta)
            optimizer.zero_grad()
            # imgs = imgs.cuda()
            images = images.to(device) 
            labels = labels.to(device)
            attention_weights, outputs = model(images)
            
            # Rank Regularization
            _, top_idx = torch.topk(attention_weights.squeeze(), tops)
            _, down_idx = torch.topk(attention_weights.squeeze(), batch_sz - tops, largest = False)

            high_group = attention_weights[top_idx]
            low_group = attention_weights[down_idx]
            high_mean = torch.mean(high_group)
            low_mean = torch.mean(low_group)
            # diff  = margin_1 - (high_mean - low_mean)
            diff  = low_mean - high_mean + margin_1

            if diff > 0:
                RR_loss = diff
            else:
                RR_loss = 0.0
            
            # targets = targets.cuda()
            loss = criterion(outputs, labels) + RR_loss 
            loss.backward()
            optimizer.step()
            
            running_loss += loss
            _, predicts = torch.max(outputs, 1)
            correct_num = torch.eq(predicts, labels).sum()
            correct_sum += correct_num

            # Relabel samples
            if i >= 10:
                sm = torch.softmax(outputs, dim = 1)
                Pmax, predicted_labels = torch.max(sm, 1) # predictions
                Pgt = torch.gather(sm, 1, labels.view(-1,1)).squeeze() # retrieve predicted probabilities of targets
                true_or_false = Pmax - Pgt > margin_2
                update_idx = true_or_false.nonzero().squeeze() # get samples' index in this mini-batch where (Pmax - Pgt > margin_2)
                label_idx = indexes[update_idx.cpu()] # get samples' index in train_loader
                relabels = predicted_labels[update_idx] # predictions where (Pmax - Pgt > margin_2)
                train_loader.dataset.labels[label_idx.cpu().numpy()] = relabels.cpu().numpy() # relabel samples in train_loader
                
        scheduler.step()
        acc = correct_sum.float() / float(train_dataset.__len__())
        running_loss = running_loss/iter_cnt
        print('[Epoch %d] Training accuracy: %.4f. Loss: %.3f' % (i, acc, running_loss))
        
        with torch.no_grad():
            running_loss = 0.0
            iter_cnt = 0
            bingo_cnt = 0
            sample_cnt = 0
            model.eval()
            # for batch_i, (imgs, targets, _) in enumerate(val_loader):
            for imgs, targets, _ in tqdm(test_loader):
                _, outputs = model(imgs.cuda())
                targets = targets.cuda()
                loss = criterion(outputs, targets)
                running_loss += loss
                iter_cnt+=1
                _, predicts = torch.max(outputs, 1)
                correct_num  = torch.eq(predicts,targets)
                bingo_cnt += correct_num.sum().cpu()
                sample_cnt += outputs.size(0)
                
            running_loss = running_loss/iter_cnt   
            acc = bingo_cnt.float()/float(sample_cnt)
            acc = np.around(acc.numpy(),4)
            print("[Epoch %d] Test accuracy:%.4f. Loss:%.3f" % (i, acc, running_loss))
           
            if acc > 0.76 :
                torch.save({'iter': i,
                            'model_state_dict': model.state_dict(),
                             'optimizer_state_dict': optimizer.state_dict(),},
                            os.path.join('models', "SCN1_epoch_"+str(i)+"_acc_"+str(acc)+".pth"))
                print('Model saved.')

In [77]:
run_training()

  checkpoint = torch.load(r"/kaggle/input/model-pretrained-weights/tot20epoch_4heads_epoch_13_loss_0.07219641906097725.pth")
100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 1] Training accuracy: 0.9501. Loss: 0.194


100%|██████████| 96/96 [00:13<00:00,  6.92it/s]


[Epoch 1] Test accuracy:0.7419. Loss:1.062


100%|██████████| 384/384 [00:38<00:00,  9.96it/s]


[Epoch 2] Training accuracy: 0.9773. Loss: 0.078


100%|██████████| 96/96 [00:11<00:00,  8.47it/s]


[Epoch 2] Test accuracy:0.7096. Loss:1.174


100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 3] Training accuracy: 0.9846. Loss: 0.053


100%|██████████| 96/96 [00:11<00:00,  8.59it/s]


[Epoch 3] Test accuracy:0.7262. Loss:1.381


100%|██████████| 384/384 [00:38<00:00,  9.98it/s]


[Epoch 4] Training accuracy: 0.9838. Loss: 0.054


100%|██████████| 96/96 [00:11<00:00,  8.32it/s]


[Epoch 4] Test accuracy:0.7484. Loss:1.156


100%|██████████| 384/384 [00:38<00:00,  9.95it/s]


[Epoch 5] Training accuracy: 0.9897. Loss: 0.036


100%|██████████| 96/96 [00:11<00:00,  8.45it/s]


[Epoch 5] Test accuracy:0.7073. Loss:1.502


100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 6] Training accuracy: 0.9937. Loss: 0.020


100%|██████████| 96/96 [00:11<00:00,  8.03it/s]


[Epoch 6] Test accuracy:0.7040. Loss:1.695


100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 7] Training accuracy: 0.9932. Loss: 0.025


100%|██████████| 96/96 [00:11<00:00,  8.21it/s]


[Epoch 7] Test accuracy:0.7467. Loss:1.245


100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 8] Training accuracy: 0.9942. Loss: 0.023


100%|██████████| 96/96 [00:11<00:00,  8.55it/s]


[Epoch 8] Test accuracy:0.7184. Loss:1.656


100%|██████████| 384/384 [00:38<00:00,  9.97it/s]


[Epoch 9] Training accuracy: 0.9976. Loss: 0.008


100%|██████████| 96/96 [00:11<00:00,  8.62it/s]


[Epoch 9] Test accuracy:0.6864. Loss:2.000


100%|██████████| 384/384 [00:39<00:00,  9.77it/s]


[Epoch 10] Training accuracy: 0.9963. Loss: 0.013


100%|██████████| 96/96 [00:10<00:00,  8.76it/s]


[Epoch 10] Test accuracy:0.6457. Loss:2.090


  1%|          | 2/384 [00:00<01:44,  3.66it/s]


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_30/151118697.py", line 28, in __getitem__
    image = Image.open(img_path).convert('RGB')
  File "/opt/conda/lib/python3.10/site-packages/PIL/Image.py", line 3431, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/raf-db-dataset/DATASET/train/4/train_00562_aligned.jpg'
