In [None]:
import os
import torch
import torch.nn as nn
import torch.optim  as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm

![ResNet Architecture](architecture_images\resnet_architecture.png)

In [17]:
resnets = {
    "resnet50": [3,4,6,3],
    "resnet101": [3,4,23,3],
    "resnet150": [3,8,36,3],
}

In [2]:
class block(nn.Module):
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super(block, self).__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
    
    def forward(self, x):
        identity = x

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        
        x += identity

        x = self.relu(x)

        return x        

In [3]:
class ResNet(nn.Module): # [3, 4, 5, 6]
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels=64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, layers[0], out_channels=64, stride=1)
        self.layer2 = self._make_layer(block, layers[1], out_channels=128, stride=2)
        self.layer3 = self._make_layer(block, layers[2], out_channels=256, stride=2)
        self.layer4 = self._make_layer(block, layers[3], out_channels=512, stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avg_pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x




    def _make_layer(self, block, num_residual_blocks, out_channels, stride):
        identity_downsample = None
        layers = []

        # for transition b/w stages and when stride > 1 reduces output dimension
        if stride != 1 or self.in_channels != out_channels * 4:
            identity_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels*4, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels*4)
            )

        layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
        self.in_channels = out_channels * 4

        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)


In [4]:
def resnet50(img_channels=3, num_channels=1000):
    return ResNet(block, [3,4,6,3], img_channels, num_channels)

def resnet101(img_channels=3, num_channels=1000):
    return ResNet(block, [3,4,23,3], img_channels, num_channels)

def resnet152(img_channels=3, num_channels=1000):
    return ResNet(block, [3,8,36,3], img_channels, num_channels)

In [5]:
def test():
    net = resnet50()
    x = torch.randn(1, 3, 224, 224)
    y = net(x).to('cuda')
    print(y.shape)

In [6]:
test()

torch.Size([1, 1000])


##  Loading dataset

In [7]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [8]:
data_dir = 'cats_vs_dogs_mini_dataset'
dataset = datasets.ImageFolder(os.path.join(data_dir), transform=data_transforms['train'])

In [9]:
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

In [10]:
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [11]:
val_dataset.dataset.transform = data_transforms['val']
test_dataset.dataset.transform = data_transforms['test']

In [12]:
batch_size = 24

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [13]:
train_data_iter = iter(train_loader)

# Get the next batch
images, labels = next(train_data_iter)
print(images.shape)
print(labels)
print(train_dataset.dataset.class_to_idx)

torch.Size([24, 3, 224, 224])
tensor([0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1])
{'cats_set': 0, 'dogs_set': 1}


In [14]:
train_dataset.dataset.classes

['cats_set', 'dogs_set']

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [20]:
model = ResNet(block, resnets['resnet50'], images.shape[1], num_classes=len(train_dataset.dataset.classes)).to(device)

In [21]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

## Training

In [22]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=3, save_path="saved_best_models/resnet_best_model.pth"):
    best_val_loss = float('inf')  # Initialize best validation loss to a large value
    epochs_no_improve = 0  # Counter for early stopping
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            
            # Backward and optimize
            loss.backward()
            optimizer.step()
            
            # Calculate train accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss /= len(train_loader)
        train_accuracy = 100 * correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                # Calculate validation accuracy
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total
        
        # Check if validation loss improved
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0  # Reset early stopping counter
            torch.save(model.state_dict(), save_path)  # Save best model
            print(f"Best model saved with val_loss: {best_val_loss:.4f}")
        # else:
        #     epochs_no_improve += 1
        
        # # Early stopping check
        # if epochs_no_improve >= patience:
        #     print("Early stopping triggered.")
        #     break
        
        print(f"Epoch [{epoch + 1}/{num_epochs}], "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")

In [23]:
num_epochs = 10
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs)


100%|██████████| 30/30 [00:20<00:00,  1.47it/s]


Best model saved with val_loss: 1.8872
Epoch [1/10], Train Loss: 0.7211, Train Acc: 55.43%, Val Loss: 1.8872, Val Acc: 52.67%


100%|██████████| 30/30 [00:18<00:00,  1.62it/s]


Best model saved with val_loss: 0.7958
Epoch [2/10], Train Loss: 0.6485, Train Acc: 65.86%, Val Loss: 0.7958, Val Acc: 59.33%


100%|██████████| 30/30 [00:17<00:00,  1.70it/s]


Best model saved with val_loss: 0.6700
Epoch [3/10], Train Loss: 0.6246, Train Acc: 67.14%, Val Loss: 0.6700, Val Acc: 64.67%


100%|██████████| 30/30 [00:19<00:00,  1.56it/s]


Epoch [4/10], Train Loss: 0.5389, Train Acc: 73.00%, Val Loss: 1.1564, Val Acc: 62.00%


100%|██████████| 30/30 [00:20<00:00,  1.46it/s]


Epoch [5/10], Train Loss: 0.5146, Train Acc: 76.00%, Val Loss: 0.7357, Val Acc: 63.33%


100%|██████████| 30/30 [00:20<00:00,  1.48it/s]


Epoch [6/10], Train Loss: 0.4307, Train Acc: 80.43%, Val Loss: 1.1938, Val Acc: 59.33%


100%|██████████| 30/30 [00:17<00:00,  1.69it/s]


Epoch [7/10], Train Loss: 0.4462, Train Acc: 81.86%, Val Loss: 1.0094, Val Acc: 62.67%


100%|██████████| 30/30 [00:16<00:00,  1.80it/s]


Epoch [8/10], Train Loss: 0.3675, Train Acc: 85.14%, Val Loss: 0.7674, Val Acc: 67.33%


100%|██████████| 30/30 [00:17<00:00,  1.71it/s]


Epoch [9/10], Train Loss: 0.3450, Train Acc: 87.43%, Val Loss: 1.6770, Val Acc: 57.33%


100%|██████████| 30/30 [00:18<00:00,  1.63it/s]


Epoch [10/10], Train Loss: 0.2876, Train Acc: 89.57%, Val Loss: 0.8803, Val Acc: 60.00%


## Test set eval

In [24]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

# Run evaluation
evaluate_model(model, test_loader)

Test Accuracy: 58.00%


## Load and inference

In [25]:
def load_model(model, load_path="best_model.pth"):
    model.load_state_dict(torch.load(load_path, weights_only=True))
    model.eval()
    return model

In [26]:
def infer(model, image):
    model = load_model(model, load_path=r"saved_best_models\resnet_best_model.pth")
    image = image.to(device)
    with torch.no_grad():
        output = model(image.unsqueeze(0))
        _, predicted = torch.max(output, 1)
    return predicted.item()

In [27]:
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = data_transforms['test'](image)
    return image

In [28]:
image_path = "cats_vs_dogs_mini_dataset\dogs_set\dog.4014.jpg"
class_index = {value:key.split("_")[0] for key, value in train_dataset.dataset.class_to_idx.items()}
image_tensor = load_image(image_path)
predicted_label = infer(model, image_tensor)

print("Predicted Class:", class_index[predicted_label])

Predicted Class: dogs
