In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader,random_split
from torchvision import transforms, datasets, models
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch.nn.functional as F

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

mean = [0.4914, 0.4822, 0.4465] 
std = [0.2470, 0.2435, 0.2616] 
batch_size = 40
n_epochs = 100

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to 224x224
    # Random augmentations
    # Randomly rotate images by 40 degrees
    transforms.RandomRotation(40),
    transforms.RandomHorizontalFlip(),  # Random horizontal flip
    transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0.05),  # Random color jitter
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=mean, std=std)  # Normalize with mean and std
])

path='train'
all_train = datasets.ImageFolder(root = path, transform = train_transform)
train_size = int(0.9 * len(all_train))
validation_size = len(all_train) - train_size
train_dataset, validation_dataset = random_split(all_train , [train_size, validation_size])
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=3
)
val_loader = DataLoader(
    validation_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=3
)

In [4]:
image_size = (3, 224, 224)  # Example: 3 channels, 32x32 pixels
num_classes = 100

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import ceil

# Inverted Residual Block with Squeeze-and-Excitation
class MBConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, expand_ratio, stride, kernel_size, reduction_ratio=4):
        super(MBConvBlock, self).__init__()
        self.stride = stride
        self.expand_ratio = expand_ratio
        hidden_dim = in_channels * expand_ratio
        
        # Expansion phase
        if expand_ratio != 1:
            self.expand_conv = nn.Conv2d(in_channels, hidden_dim, kernel_size=1, bias=False)
            self.bn0 = nn.BatchNorm2d(hidden_dim)
        else:
            self.expand_conv = None
        
        # Depthwise convolution
        self.depthwise_conv = nn.Conv2d(hidden_dim if expand_ratio != 1 else in_channels, hidden_dim, 
                                        kernel_size=kernel_size, stride=stride, 
                                        padding=kernel_size // 2, groups=hidden_dim, bias=False)
        self.bn1 = nn.BatchNorm2d(hidden_dim)
        
        # Squeeze and Excitation block
        self.se_avgpool = nn.AdaptiveAvgPool2d(1)
        self.se_fc1 = nn.Conv2d(hidden_dim, hidden_dim // reduction_ratio, kernel_size=1)
        self.se_fc2 = nn.Conv2d(hidden_dim // reduction_ratio, hidden_dim, kernel_size=1)
        
        # Output phase
        self.project_conv = nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.use_residual = (in_channels == out_channels and stride == 1)
    
    def forward(self, x):
        identity = x
        
        if self.expand_conv:
            out = F.relu6(self.bn0(self.expand_conv(x)))
        else:
            out = x
        
        # Depthwise convolution
        out = F.relu6(self.bn1(self.depthwise_conv(out)))
        
        # Squeeze and Excitation
        se = self.se_avgpool(out)
        se = F.relu(self.se_fc1(se))
        se = torch.sigmoid(self.se_fc2(se))
        out = out * se
        
        # Output
        out = self.bn2(self.project_conv(out))
        
        if self.use_residual:
            out = out + identity
        
        return out

# EfficientNet Main Architecture
class EfficientNet(nn.Module):
    def __init__(self, width_coefficient, depth_coefficient, dropout_rate=0.2, num_classes=100):
        super(EfficientNet, self).__init__()
        
        # Base settings for EfficientNet-B0 with reduced coefficients
        base_channels = 16  # Reduced base channels
        base_layers = [
            # (expand_ratio, out_channels, num_blocks, stride, kernel_size)
            (1, 16, 1, 1, 3),   # Stage 1
            (6, 24, 2, 2, 3),   # Stage 2
            (6, 40, 2, 2, 5),   # Stage 3
            (6, 80, 3, 2, 3),   # Stage 4
            (6, 112, 3, 1, 5),  # Stage 5
            (6, 192, 4, 2, 5),  # Stage 6
            (6, 320, 1, 1, 3)   # Stage 7
        ]
        
        # Stem
        out_channels = ceil(base_channels * width_coefficient)
        self.stem_conv = nn.Conv2d(3, out_channels, kernel_size=3, stride=2, padding=1, bias=False)
        self.stem_bn = nn.BatchNorm2d(out_channels)
        
        # Build blocks
        self.blocks = nn.ModuleList([])
        in_channels = out_channels
        for expand_ratio, out_channels, num_blocks, stride, kernel_size in base_layers:
            out_channels = ceil(out_channels * width_coefficient)
            num_blocks = ceil(num_blocks * depth_coefficient)
            for i in range(num_blocks):
                block_stride = stride if i == 0 else 1
                self.blocks.append(MBConvBlock(in_channels, out_channels, expand_ratio, block_stride, kernel_size))
                in_channels = out_channels
        
        # Head
        final_channels = ceil(1024 * width_coefficient)  # Reduced head channels
        self.head_conv = nn.Conv2d(in_channels, final_channels, kernel_size=1, bias=False)
        self.head_bn = nn.BatchNorm2d(final_channels)
        
        # Pooling and classification
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(final_channels, num_classes)
    
    def forward(self, x):
        # Stem
        x = F.relu6(self.stem_bn(self.stem_conv(x)))
        
        # Blocks
        for block in self.blocks:
            x = block(x)
        
        # Head
        x = F.relu6(self.head_bn(self.head_conv(x)))
        
        # Pooling and classification
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x

def efficientnet_b0(num_classes=100):
    # Reduced width and depth coefficients to reduce parameters
    return EfficientNet(width_coefficient=0.95, depth_coefficient=0.95, num_classes=num_classes)

In [6]:
model = efficientnet_b0(num_classes=100)
total_params = sum(p.numel() for p in model.parameters())
print("# parameters:", total_params)

# parameters: 6513208


In [7]:
model = efficientnet_b0(num_classes=100).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=1e-6)

# Early stopping class
class EarlyStopper:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

# Train function
def train(model, train_loader, optimizer, loss_fn):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
        
    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = correct / total
    return epoch_loss, accuracy

# Validation function
@torch.no_grad()
def validate(model, val_loader, loss_fn):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(val_loader):
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        val_loss += loss.item() * images.size(0)
        
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
        
    avg_loss = val_loss / len(val_loader.dataset)
    accuracy = correct / total
    return avg_loss, accuracy

# Training loop
train_loss_list = []
valid_loss_list = []
early_stopper = EarlyStopper(patience=7)

best_val_acc = 0.0

for epoch in range(n_epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, loss_fn)
    val_loss, val_acc = validate(model, val_loader, loss_fn)
    
    train_loss_list.append(train_loss)
    valid_loss_list.append(val_loss)
    
    print(f"Epoch {epoch+1}/{n_epochs}:")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    
    # Step the learning rate scheduler
    scheduler.step()
    
    # Early stopping check
    if early_stopper.early_stop(val_loss):
        print("Early stopping")
        break
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")

# Save the final model
torch.save(model.state_dict(), "final_model.pth")

100%|██████████| 315/315 [01:58<00:00,  2.66it/s]
100%|██████████| 35/35 [00:17<00:00,  1.98it/s]


Epoch 1/100:
Train Loss: 4.0532, Train Acc: 0.0604
Val Loss: 3.8676, Val Acc: 0.0879


100%|██████████| 315/315 [01:13<00:00,  4.29it/s]
100%|██████████| 35/35 [00:14<00:00,  2.49it/s]


Epoch 2/100:
Train Loss: 3.5659, Train Acc: 0.1102
Val Loss: 3.6115, Val Acc: 0.1243


100%|██████████| 315/315 [01:13<00:00,  4.27it/s]
100%|██████████| 35/35 [00:11<00:00,  2.96it/s]


Epoch 3/100:
Train Loss: 3.2620, Train Acc: 0.1722
Val Loss: 3.3285, Val Acc: 0.1643


100%|██████████| 315/315 [01:13<00:00,  4.27it/s]
100%|██████████| 35/35 [00:12<00:00,  2.76it/s]


Epoch 4/100:
Train Loss: 3.0246, Train Acc: 0.2224
Val Loss: 3.0099, Val Acc: 0.2193


100%|██████████| 315/315 [01:14<00:00,  4.24it/s]
100%|██████████| 35/35 [00:11<00:00,  2.95it/s]


Epoch 5/100:
Train Loss: 2.7730, Train Acc: 0.2710
Val Loss: 2.7496, Val Acc: 0.2779


100%|██████████| 315/315 [01:12<00:00,  4.37it/s]
100%|██████████| 35/35 [00:12<00:00,  2.85it/s]


Epoch 6/100:
Train Loss: 2.5913, Train Acc: 0.3169
Val Loss: 2.5188, Val Acc: 0.3264


100%|██████████| 315/315 [01:12<00:00,  4.36it/s]
100%|██████████| 35/35 [00:11<00:00,  2.93it/s]


Epoch 7/100:
Train Loss: 2.4340, Train Acc: 0.3497
Val Loss: 2.4947, Val Acc: 0.3400


100%|██████████| 315/315 [01:13<00:00,  4.30it/s]
100%|██████████| 35/35 [00:20<00:00,  1.68it/s]


Epoch 8/100:
Train Loss: 2.2672, Train Acc: 0.3914
Val Loss: 2.3263, Val Acc: 0.3793


100%|██████████| 315/315 [02:08<00:00,  2.45it/s]
100%|██████████| 35/35 [00:11<00:00,  2.96it/s]


Epoch 9/100:
Train Loss: 2.1432, Train Acc: 0.4233
Val Loss: 2.3667, Val Acc: 0.3721


100%|██████████| 315/315 [01:28<00:00,  3.57it/s]
100%|██████████| 35/35 [00:11<00:00,  3.00it/s]


Epoch 10/100:
Train Loss: 2.0350, Train Acc: 0.4445
Val Loss: 2.0864, Val Acc: 0.4350


100%|██████████| 315/315 [01:26<00:00,  3.63it/s]
100%|██████████| 35/35 [00:11<00:00,  3.03it/s]


Epoch 11/100:
Train Loss: 1.9019, Train Acc: 0.4717
Val Loss: 2.0797, Val Acc: 0.4371


100%|██████████| 315/315 [01:33<00:00,  3.38it/s]
100%|██████████| 35/35 [00:14<00:00,  2.43it/s]


Epoch 12/100:
Train Loss: 1.8168, Train Acc: 0.4943
Val Loss: 1.9804, Val Acc: 0.4721


100%|██████████| 315/315 [01:31<00:00,  3.44it/s]
100%|██████████| 35/35 [00:11<00:00,  2.92it/s]


Epoch 13/100:
Train Loss: 1.7213, Train Acc: 0.5224
Val Loss: 1.9216, Val Acc: 0.4886


100%|██████████| 315/315 [00:48<00:00,  6.45it/s]
100%|██████████| 35/35 [00:09<00:00,  3.81it/s]


Epoch 14/100:
Train Loss: 1.6205, Train Acc: 0.5457
Val Loss: 1.9387, Val Acc: 0.4771


100%|██████████| 315/315 [00:48<00:00,  6.55it/s]
100%|██████████| 35/35 [00:09<00:00,  3.76it/s]


Epoch 15/100:
Train Loss: 1.5727, Train Acc: 0.5574
Val Loss: 1.6644, Val Acc: 0.5336


100%|██████████| 315/315 [00:47<00:00,  6.57it/s]
100%|██████████| 35/35 [00:09<00:00,  3.74it/s]


Epoch 16/100:
Train Loss: 1.4591, Train Acc: 0.5820
Val Loss: 1.6723, Val Acc: 0.5336


100%|██████████| 315/315 [00:48<00:00,  6.55it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 17/100:
Train Loss: 1.3945, Train Acc: 0.6022
Val Loss: 1.4962, Val Acc: 0.5886


100%|██████████| 315/315 [00:47<00:00,  6.57it/s]
100%|██████████| 35/35 [00:09<00:00,  3.73it/s]


Epoch 18/100:
Train Loss: 1.3368, Train Acc: 0.6147
Val Loss: 1.5658, Val Acc: 0.5829


100%|██████████| 315/315 [00:47<00:00,  6.57it/s]
100%|██████████| 35/35 [00:09<00:00,  3.67it/s]


Epoch 19/100:
Train Loss: 1.2755, Train Acc: 0.6294
Val Loss: 1.5649, Val Acc: 0.5686


100%|██████████| 315/315 [00:48<00:00,  6.54it/s]
100%|██████████| 35/35 [00:09<00:00,  3.74it/s]


Epoch 20/100:
Train Loss: 1.2280, Train Acc: 0.6448
Val Loss: 1.5403, Val Acc: 0.5907


100%|██████████| 315/315 [00:48<00:00,  6.55it/s]
100%|██████████| 35/35 [00:09<00:00,  3.79it/s]


Epoch 21/100:
Train Loss: 1.1560, Train Acc: 0.6641
Val Loss: 1.5577, Val Acc: 0.5714


100%|██████████| 315/315 [00:48<00:00,  6.53it/s]
100%|██████████| 35/35 [00:09<00:00,  3.60it/s]


Epoch 22/100:
Train Loss: 1.1067, Train Acc: 0.6756
Val Loss: 1.4241, Val Acc: 0.6071


100%|██████████| 315/315 [00:48<00:00,  6.52it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 23/100:
Train Loss: 1.0623, Train Acc: 0.6900
Val Loss: 1.4669, Val Acc: 0.6014


100%|██████████| 315/315 [00:48<00:00,  6.56it/s]
100%|██████████| 35/35 [00:09<00:00,  3.73it/s]


Epoch 24/100:
Train Loss: 1.0061, Train Acc: 0.7007
Val Loss: 1.3802, Val Acc: 0.6193


100%|██████████| 315/315 [00:48<00:00,  6.53it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 25/100:
Train Loss: 0.9438, Train Acc: 0.7207
Val Loss: 1.3653, Val Acc: 0.6521


100%|██████████| 315/315 [00:48<00:00,  6.54it/s]
100%|██████████| 35/35 [00:09<00:00,  3.70it/s]


Epoch 26/100:
Train Loss: 0.9175, Train Acc: 0.7253
Val Loss: 1.2833, Val Acc: 0.6493


100%|██████████| 315/315 [00:47<00:00,  6.58it/s]
100%|██████████| 35/35 [00:09<00:00,  3.77it/s]


Epoch 27/100:
Train Loss: 0.8691, Train Acc: 0.7421
Val Loss: 1.3308, Val Acc: 0.6379


100%|██████████| 315/315 [00:47<00:00,  6.59it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 28/100:
Train Loss: 0.8445, Train Acc: 0.7454
Val Loss: 1.3177, Val Acc: 0.6614


100%|██████████| 315/315 [00:47<00:00,  6.58it/s]
100%|██████████| 35/35 [00:09<00:00,  3.70it/s]


Epoch 29/100:
Train Loss: 0.7848, Train Acc: 0.7653
Val Loss: 1.3005, Val Acc: 0.6557


100%|██████████| 315/315 [00:48<00:00,  6.54it/s]
100%|██████████| 35/35 [00:09<00:00,  3.73it/s]


Epoch 30/100:
Train Loss: 0.7673, Train Acc: 0.7639
Val Loss: 1.2050, Val Acc: 0.6771


100%|██████████| 315/315 [00:47<00:00,  6.60it/s]
100%|██████████| 35/35 [00:09<00:00,  3.73it/s]


Epoch 31/100:
Train Loss: 0.7260, Train Acc: 0.7803
Val Loss: 1.2300, Val Acc: 0.6629


100%|██████████| 315/315 [00:47<00:00,  6.57it/s]
100%|██████████| 35/35 [00:09<00:00,  3.67it/s]


Epoch 32/100:
Train Loss: 0.6880, Train Acc: 0.7846
Val Loss: 1.1799, Val Acc: 0.6864


100%|██████████| 315/315 [00:47<00:00,  6.61it/s]
100%|██████████| 35/35 [00:09<00:00,  3.74it/s]


Epoch 33/100:
Train Loss: 0.6682, Train Acc: 0.7988
Val Loss: 1.2262, Val Acc: 0.6800


100%|██████████| 315/315 [00:47<00:00,  6.61it/s]
100%|██████████| 35/35 [00:11<00:00,  3.16it/s]


Epoch 34/100:
Train Loss: 0.6170, Train Acc: 0.8103
Val Loss: 1.1842, Val Acc: 0.6807


100%|██████████| 315/315 [00:50<00:00,  6.28it/s]
100%|██████████| 35/35 [00:09<00:00,  3.76it/s]


Epoch 35/100:
Train Loss: 0.5967, Train Acc: 0.8155
Val Loss: 1.1610, Val Acc: 0.6914


100%|██████████| 315/315 [00:48<00:00,  6.54it/s]
100%|██████████| 35/35 [00:09<00:00,  3.69it/s]


Epoch 36/100:
Train Loss: 0.5543, Train Acc: 0.8270
Val Loss: 1.1539, Val Acc: 0.6964


100%|██████████| 315/315 [00:47<00:00,  6.59it/s]
100%|██████████| 35/35 [00:09<00:00,  3.69it/s]


Epoch 37/100:
Train Loss: 0.5340, Train Acc: 0.8366
Val Loss: 1.1451, Val Acc: 0.6943


100%|██████████| 315/315 [00:48<00:00,  6.55it/s]
100%|██████████| 35/35 [00:09<00:00,  3.81it/s]


Epoch 38/100:
Train Loss: 0.5004, Train Acc: 0.8419
Val Loss: 1.2385, Val Acc: 0.6943


100%|██████████| 315/315 [00:47<00:00,  6.59it/s]
100%|██████████| 35/35 [00:09<00:00,  3.83it/s]


Epoch 39/100:
Train Loss: 0.4813, Train Acc: 0.8503
Val Loss: 1.1279, Val Acc: 0.7236


100%|██████████| 315/315 [00:47<00:00,  6.58it/s]
100%|██████████| 35/35 [00:09<00:00,  3.77it/s]


Epoch 40/100:
Train Loss: 0.4542, Train Acc: 0.8567
Val Loss: 1.1293, Val Acc: 0.7100


100%|██████████| 315/315 [00:47<00:00,  6.60it/s]
100%|██████████| 35/35 [00:09<00:00,  3.78it/s]


Epoch 41/100:
Train Loss: 0.4338, Train Acc: 0.8670
Val Loss: 1.2291, Val Acc: 0.6971


100%|██████████| 315/315 [00:48<00:00,  6.54it/s]
100%|██████████| 35/35 [00:09<00:00,  3.78it/s]


Epoch 42/100:
Train Loss: 0.4220, Train Acc: 0.8684
Val Loss: 1.0835, Val Acc: 0.7171


100%|██████████| 315/315 [00:47<00:00,  6.60it/s]
100%|██████████| 35/35 [00:09<00:00,  3.82it/s]


Epoch 43/100:
Train Loss: 0.4048, Train Acc: 0.8721
Val Loss: 1.1330, Val Acc: 0.7200


100%|██████████| 315/315 [00:47<00:00,  6.60it/s]
100%|██████████| 35/35 [00:09<00:00,  3.77it/s]


Epoch 44/100:
Train Loss: 0.3524, Train Acc: 0.8885
Val Loss: 1.0589, Val Acc: 0.7257


100%|██████████| 315/315 [00:48<00:00,  6.49it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 45/100:
Train Loss: 0.3527, Train Acc: 0.8901
Val Loss: 1.1513, Val Acc: 0.7121


100%|██████████| 315/315 [00:47<00:00,  6.58it/s]
100%|██████████| 35/35 [00:09<00:00,  3.68it/s]


Epoch 46/100:
Train Loss: 0.3424, Train Acc: 0.8913
Val Loss: 1.1196, Val Acc: 0.7186


100%|██████████| 315/315 [00:47<00:00,  6.58it/s]
100%|██████████| 35/35 [00:09<00:00,  3.77it/s]


Epoch 47/100:
Train Loss: 0.3133, Train Acc: 0.9017
Val Loss: 1.1368, Val Acc: 0.7364


100%|██████████| 315/315 [00:47<00:00,  6.57it/s]
100%|██████████| 35/35 [00:09<00:00,  3.75it/s]


Epoch 48/100:
Train Loss: 0.2872, Train Acc: 0.9091
Val Loss: 1.0667, Val Acc: 0.7307


100%|██████████| 315/315 [00:47<00:00,  6.59it/s]
100%|██████████| 35/35 [00:09<00:00,  3.71it/s]


Epoch 49/100:
Train Loss: 0.2909, Train Acc: 0.9065
Val Loss: 1.1473, Val Acc: 0.7179


100%|██████████| 315/315 [01:07<00:00,  4.68it/s]
100%|██████████| 35/35 [00:11<00:00,  2.98it/s]


Epoch 50/100:
Train Loss: 0.2656, Train Acc: 0.9152
Val Loss: 1.0766, Val Acc: 0.7371


100%|██████████| 315/315 [01:13<00:00,  4.31it/s]
100%|██████████| 35/35 [00:10<00:00,  3.21it/s]

Epoch 51/100:
Train Loss: 0.2459, Train Acc: 0.9229
Val Loss: 1.1232, Val Acc: 0.7379
Early stopping



