In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os


In [14]:
# Define the attention mechanism module with fewer parameters
class Attention(nn.Module):
    def __init__(self, in_channels):
        super(Attention, self).__init__()
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=1)  # Reduce channels
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        att_map = self.sigmoid(self.conv(x))
        return att_map * x

# Define the image classifier model with a smaller backbone
class ImageClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ImageClassifier, self).__init__()
        resnet = models.resnet18(pretrained=True)  # Use ResNet18 instead of ResNet50
        self.features = nn.Sequential(*list(resnet.children())[:-2])
        self.attention = Attention(512)  # Adjust input channels for the attention mechanism
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)  # Reduce the number of input features

    def forward(self, x):
        x = self.features(x)
        x = self.attention(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


In [10]:
# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}
        self.images = self._load_images()

    def _load_images(self):
        images = []
        for cls_name in self.classes:
            class_dir = os.path.join(self.root_dir, cls_name)
            if not os.path.isdir(class_dir):
                continue
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                images.append((img_path, self.class_to_idx[cls_name]))
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label




In [16]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create dataset and dataloader
dataset = CustomDataset(root_dir='dataset', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [17]:
# Initialize model and optimizer
model = ImageClassifier(num_classes=len(dataset.classes))
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\neelr/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:16<00:00, 2.92MB/s]


In [18]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    for images, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Save trained model

Epoch [1/5], Loss: 0.2762
Epoch [2/5], Loss: 0.4145
Epoch [3/5], Loss: 1.1234
Epoch [4/5], Loss: 0.0449
Epoch [5/5], Loss: 0.0157
