In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import os, random
from collections import Counter
from sklearn.model_selection import train_test_split
import numpy as np

transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])


In [2]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform
        self.class_weights = self.compute_class_weights()

    def compute_class_weights(self):
        class_weights = compute_class_weight('balanced', classes=np.unique(self.labels), y=self.labels)
        return torch.tensor(class_weights, dtype=torch.float)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

In [6]:
data_dir = "/content/drive/MyDrive/Colab Notebooks/MultiClass_m-20240806T134043Z-001/MultiClass_m"
classes = sorted(os.listdir(data_dir))
class_to_idx = {cls: i for i, cls in enumerate(classes)}

img_paths, labels = [], []
for label in classes:
    folder = os.path.join(data_dir, label)
    for img_name in os.listdir(folder):
        img_paths.append(os.path.join(folder, img_name))
        labels.append(class_to_idx[label])
def oversample_dataset(img_paths, labels):
    class_counts = Counter(labels)
    max_count = max(class_counts.values())
    new_img_paths, new_labels = img_paths.copy(), labels.copy()

    for cls, count in class_counts.items():
        if count < max_count:
            diff = max_count - count
            cls_indices = [i for i, y in enumerate(labels) if y == cls]
            for _ in range(diff):
                idx = random.choice(cls_indices)
                new_img_paths.append(img_paths[idx])
                new_labels.append(labels[idx])
    return new_img_paths, new_labels

oversampled_img_paths, oversampled_labels = oversample_dataset(img_paths, labels)

train_paths, test_paths, train_labels, test_labels = train_test_split(
    oversampled_img_paths, oversampled_labels, test_size=0.2, stratify=oversampled_labels
)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    test_paths, test_labels, test_size=0.5, stratify=test_labels
)


)

train_dataset = CustomDataset(train_paths, train_labels, transform)
val_dataset = CustomDataset(val_paths, val_labels, transform)
test_dataset = CustomDataset(test_paths, test_labels, transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

class WeightedDenseNet121(nn.Module):
    def __init__(self, num_classes, class_weights):
        super().__init__()
        self.model = models.densenet121(pretrained=True)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, num_classes)
        self.class_weights = class_weights

    def forward(self, x, labels=None):
        logits = self.model(x)
        loss = None
        if labels is not None:
            criterion = nn.CrossEntropyLoss(weight=self.class_weights.to(x.device))
            loss = criterion(logits, labels)
        return (loss, logits) if loss is not None else logits

num_classes = len(class_to_idx)
model = WeightedDenseNet121(num_classes, train_dataset.class_weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10

for epoch in range(1, num_epochs + 1):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        loss, outputs = model(imgs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

acc = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

print("\nTest Results:")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")

Test Results:
Accuracy : 0.69
Precision: 0.68
Recall   : 0.68
F1 Score : 0.67
