In [3]:
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms.v2 as v2

import torch.nn as nn

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = None
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=2):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self.make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self.make_layer(block, 64, num_blocks[2], stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, num_blocks, stride):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(block(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Image transformations
transform = v2.Compose([
    v2.RandomHorizontalFlip(),
    v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    v2.ToTensor()
])

# Load data
folder = ImageFolder('train', transform=transform)
loader = DataLoader(folder, batch_size=16, shuffle=True)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model and move it to GPU
model = ResNet(BasicBlock, [2, 2, 2]).to(device)  # You can adjust the number of blocks in each layer as needed

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
total_epochs = 10
for epoch in range(total_epochs):
    correct = 0
    total = 0
    for images, labels in loader:
        # Move input data and labels to the same device as the model
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{total_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.2f}%')

# Save your model if needed
torch.save(model.state_dict(), 'resnet_model.pth')

Epoch [1/10], Loss: 0.4563, Accuracy: 72.69%
Epoch [2/10], Loss: 0.2434, Accuracy: 76.38%
Epoch [3/10], Loss: 0.6000, Accuracy: 78.48%
Epoch [4/10], Loss: 0.9234, Accuracy: 79.51%
Epoch [5/10], Loss: 0.3378, Accuracy: 80.59%
Epoch [6/10], Loss: 0.2655, Accuracy: 80.56%
Epoch [7/10], Loss: 0.3839, Accuracy: 81.22%
Epoch [8/10], Loss: 0.1629, Accuracy: 81.79%
Epoch [9/10], Loss: 0.2612, Accuracy: 81.34%
Epoch [10/10], Loss: 0.0919, Accuracy: 82.28%


In [4]:
# Load validation data
val_folder = ImageFolder('val', transform=v2.ToTensor())
val_loader = DataLoader(val_folder, batch_size=8, shuffle=False)  # No need to shuffle for validation

# Evaluation loop
model.eval()  # Set the model to evaluation mode
val_correct = 0
val_total = 0
with torch.no_grad():  # No need to track gradients during inference
    for images, labels in val_loader:
        # Move input data and labels to the same device as the model
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()

val_accuracy = 100 * val_correct / val_total
print(f'Validation Accuracy: {val_accuracy:.2f}%')

Validation Accuracy: 83.07%
