In [21]:
# import kagglehub
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import random
import torch.nn as nn
from torchvision.models import efficientnet_b5, EfficientNet_B5_Weights
from torchvision.models._api import WeightsEnum
from torch.hub import load_state_dict_from_url
import torch.optim as optim
import torch

In [22]:
path="../input/actual/train"

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

allDataSet = datasets.ImageFolder(root=path, transform=transform)
totalSize = len(allDataSet)
reductionRatio = 1
reducedSize = int(totalSize * reductionRatio)
indices = list(range(totalSize))
random.shuffle(indices)
reduced_indices = indices[:reducedSize]
reducedDataset = Subset(allDataSet, reduced_indices)

print(f"Reduced Train: {len(reducedDataset)}")
dataloader = DataLoader(reducedDataset, batch_size=32, shuffle=True)


Reduced Train: 6525


In [23]:
path="../input/actual/eval"

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

evalSet = datasets.ImageFolder(root=path, transform=transform)

print(f"Reduced Train: {len(evalSet)}")
valLoader = DataLoader(evalSet, batch_size=32, shuffle=True)


Reduced Train: 1395


In [24]:
# Number of classes in your dataset
num_classes = len(reducedDataset.dataset.classes) 
print(num_classes)

5


### Applying SE in the classifier

In [25]:
class SqueezeExcitationBlock(nn.Module):
    def __init__(self, channels, reduction_ratio=16):
        super(SqueezeExcitationBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.interaction = nn.Sequential(
            nn.Linear(channels, channels // reduction_ratio, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction_ratio, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.interaction(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

def get_state_dict(self, *args, **kwargs):
    kwargs.pop("check_hash")
    return load_state_dict_from_url(self.url, *args, **kwargs)
WeightsEnum.get_state_dict = get_state_dict

model = efficientnet_b5(weights="DEFAULT")

num_classes = len(reducedDataset.dataset.classes)
print("Number of classes:", num_classes)

# Freeze everything
for param in model.parameters():
    param.requires_grad = False

# Add SE block + classifier
channels = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Unflatten(1, (channels, 1, 1)),          # Reshape [B, C] → [B, C, 1, 1]
    SqueezeExcitationBlock(channels),
    nn.Flatten(),                               # Back to [B, C]
    nn.Dropout(p=0.4),
    nn.Linear(channels, num_classes)
)


optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

Number of classes: 5


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
            (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormAct

In [26]:
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    print(f"Epoch [{epoch+1}/{epochs}] Loss: {running_loss/total:.4f} Accuracy: {acc:.4f}")

Epoch [1/10] Loss: 1.3974 Accuracy: 0.4346
Epoch [2/10] Loss: 1.1948 Accuracy: 0.5126
Epoch [3/10] Loss: 1.1287 Accuracy: 0.5441
Epoch [4/10] Loss: 1.0783 Accuracy: 0.5660
Epoch [5/10] Loss: 1.0369 Accuracy: 0.5815
Epoch [6/10] Loss: 0.9939 Accuracy: 0.6051
Epoch [7/10] Loss: 0.9266 Accuracy: 0.6402
Epoch [8/10] Loss: 0.8868 Accuracy: 0.6605
Epoch [9/10] Loss: 0.8388 Accuracy: 0.6798
Epoch [10/10] Loss: 0.7707 Accuracy: 0.7130


In [27]:
torch.save(model.state_dict(), "../models/SEClassifier.pth")

### Creating a Custom class to incorporate SE after features and within classifier

In [28]:
def get_state_dict(self, *args, **kwargs):
    kwargs.pop("check_hash")
    return load_state_dict_from_url(self.url, *args, **kwargs)

class SqueezeExcitationBlock(nn.Module):
    def __init__(self, channels, reduction_ratio=16):
        super(SqueezeExcitationBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.interaction = nn.Sequential(
            nn.Linear(channels, channels // reduction_ratio, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction_ratio, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.interaction(y).view(b, c, 1, 1)
        return x * y.expand_as(x)
class CustomENB5(nn.Module):
    def __init__(self,num_classes):
        super(CustomENB5, self).__init__()
        WeightsEnum.get_state_dict = get_state_dict

        self.base = efficientnet_b5(weights="DEFAULT")
        for param in self.base.parameters():
            param.requires_grad = False
        
        self.seAfterFeature = SqueezeExcitationBlock(
            channels=2048
        )

        self.base.classifier = nn.Sequential(
            nn.Unflatten(1, (2048, 1, 1)),         
            SqueezeExcitationBlock(2048),
            nn.Flatten(),                            
            nn.Dropout(p=0.4),
            nn.Linear(2048, num_classes)
        )

    def forward(self, x):
        x = self.base.features(x)
        x = self.seAfterFeature(x)
        x = self.base.avgpool(x)
        x = torch.flatten(x,1)
        x = self.base.classifier(x)
        return x        

In [29]:
model = CustomENB5(num_classes)

optimizer = optim.Adam(model.base.classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

CustomENB5(
  (base): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
              (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (scale_a

In [30]:
epochs = 10
best_val_acc = 0.0
patience = 3
trigger_times = 0

for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    print(f"Epoch [{epoch+1}/{epochs}] Loss: {running_loss/total:.4f} Accuracy: {acc:.4f}")

    model.eval()
    eval_loss = 0.0
    eval_correct = 0
    eval_total = 0

    with torch.no_grad():
        for images, labels in valLoader:  # assumes valLoader is defined
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            eval_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            eval_correct += (predicted == labels).sum().item()
            eval_total += labels.size(0)

    val_loss = eval_loss / eval_total
    val_acc = eval_correct / eval_total
    print(f"Eval  Loss: {val_loss:.4f} | Accuracy: {val_acc:.4f}")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        trigger_times = 0
        torch.save(model.state_dict(), "../models/bestSEModified.pth")
    else:
        trigger_times += 1
        print(f"No improvement. Trigger {trigger_times}/{patience}")

        if trigger_times >= patience:
            print("Early stopping triggered.")
            break

Epoch [1/10] Loss: 1.4577 Accuracy: 0.4311
Eval  Loss: 1.3700 | Accuracy: 0.4667
Epoch [2/10] Loss: 1.2555 Accuracy: 0.5195
Eval  Loss: 1.2310 | Accuracy: 0.4989
Epoch [3/10] Loss: 1.1267 Accuracy: 0.5625
Eval  Loss: 1.1891 | Accuracy: 0.5176
Epoch [4/10] Loss: 1.0430 Accuracy: 0.6052
Eval  Loss: 1.1648 | Accuracy: 0.5254
Epoch [5/10] Loss: 0.9655 Accuracy: 0.6440
Eval  Loss: 1.1533 | Accuracy: 0.5262
Epoch [6/10] Loss: 0.8832 Accuracy: 0.6849
Eval  Loss: 1.1472 | Accuracy: 0.5290
Epoch [7/10] Loss: 0.7891 Accuracy: 0.7401
Eval  Loss: 1.1421 | Accuracy: 0.5376
Epoch [8/10] Loss: 0.6856 Accuracy: 0.7940
Eval  Loss: 1.1438 | Accuracy: 0.5491
Epoch [9/10] Loss: 0.5793 Accuracy: 0.8501
Eval  Loss: 1.1515 | Accuracy: 0.5341
No improvement. Trigger 1/3
Epoch [10/10] Loss: 0.4777 Accuracy: 0.8992
Eval  Loss: 1.1601 | Accuracy: 0.5369
No improvement. Trigger 2/3
