In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
data_dir = "/kaggle/input/microscopic-peripheral-blood-cell-images/PBC_dataset_normal_DIB"

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms['train'])
indices = list(range(len(full_dataset)))

In [3]:
dataper = [0.01, 0.025, 0.05, 0.075, 0.1, 0.2, 0.3]
accuracies = {
    "test" : []
}
test_acc_per = 0

# Ensure that the test set is fixed with 4000 images
train_idx, test_idx = train_test_split(indices, test_size=4000, random_state=42)
test_dataset = Subset(full_dataset, test_idx)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Test set: {len(test_dataset)} images")

Test set: 4000 images


In [4]:
mod_dataper = [i * (len(full_dataset)/(len(full_dataset)-4000)) for i in dataper]
mod_dataper

[0.013055300947143293,
 0.032638252367858236,
 0.06527650473571647,
 0.0979147571035747,
 0.13055300947143295,
 0.2611060189428659,
 0.3916590284142988]

In [5]:
for i,j in zip(mod_dataper,dataper):
    # Split the remaining indices for training and validation
    remaining_indices = [idx for idx in indices if idx not in test_idx]
    train_idx, val_idx = train_test_split(remaining_indices, train_size=i, random_state=42)
    val_idx, val_idx_no_use = train_test_split(val_idx, train_size=0.02, random_state=42)

    train_dataset = Subset(full_dataset, train_idx)
    val_dataset = Subset(full_dataset, val_idx)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    print(f"Train set: {len(train_dataset)} images")
    print(f"Validation set: {len(val_dataset)} images")

    num_classes = 8

    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 8)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs):
        best_acc = 0.0
        for epoch in range(num_epochs):
            print(f'Epoch {epoch+1}/{num_epochs}')
            print('-' * 10)

            model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct += torch.sum(preds == labels.data)
                total += labels.size(0)

            epoch_loss = running_loss / len(train_loader.dataset)
            epoch_acc = correct.double() / len(train_loader.dataset)

            print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Validation phase
            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)

                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item() * inputs.size(0)
                    _, preds = torch.max(outputs, 1)
                    correct += torch.sum(preds == labels.data)
                    total += labels.size(0)

            val_loss = val_loss / len(val_loader.dataset)
            val_acc = correct.double() / len(val_loader.dataset)

            print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

            # Step scheduler
            scheduler.step()

            # Save the model if validation accuracy improves
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), f'best_resnet50_{j*100}.pth')
                print(f"Best model saved with accuracy: {best_acc:.4f}")
                train_acc_per = round(epoch_acc.item(), 4)
                val_acc_per = round(val_acc.item(), 4)

    # Train the model with dynamic number of epochs
    train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10)

    # Evaluate the model on the test dataset
    def test_model(model, test_loader):
        model.load_state_dict(torch.load(f'best_resnet50_{j*100}.pth'))
        model.eval()

        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                correct += torch.sum(preds == labels.data)
                total += labels.size(0)

        test_acc = correct.double() / total
        print(f'Test Accuracy: {test_acc:.4f}')
        return round(test_acc.item(), 4)

    # Test the model
    test_acc_per = test_model(model, test_loader)
    accuracies["test"].append(test_acc_per)
    
    print(f'Done for {j*100} % data')


Train set: 170 images
Validation set: 258 images




Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

 12%|█▏        | 11.9M/97.8M [00:00<00:00, 125MB/s]

 29%|██▉       | 28.2M/97.8M [00:00<00:00, 152MB/s]

 48%|████▊     | 47.2M/97.8M [00:00<00:00, 173MB/s]

 69%|██████▊   | 67.1M/97.8M [00:00<00:00, 187MB/s]

 89%|████████▉ | 86.9M/97.8M [00:00<00:00, 194MB/s]

100%|██████████| 97.8M/97.8M [00:00<00:00, 182MB/s]




Epoch 1/10
----------


Train Loss: 1.5579 Acc: 0.4647


Val Loss: 6.7035 Acc: 0.2636
Best model saved with accuracy: 0.2636
Epoch 2/10
----------


Train Loss: 1.1910 Acc: 0.5765


Val Loss: 10.4625 Acc: 0.3372
Best model saved with accuracy: 0.3372
Epoch 3/10
----------


Train Loss: 0.8828 Acc: 0.7588


Val Loss: 19.6271 Acc: 0.3178
Epoch 4/10
----------


Train Loss: 0.8888 Acc: 0.7588


Val Loss: 10.8141 Acc: 0.4341


Best model saved with accuracy: 0.4341
Epoch 5/10
----------


Train Loss: 0.9382 Acc: 0.6882


Val Loss: 5.5210 Acc: 0.3682
Epoch 6/10
----------


Train Loss: 0.8690 Acc: 0.7059


Val Loss: 15.6704 Acc: 0.2674
Epoch 7/10
----------


Train Loss: 0.6788 Acc: 0.7412


Val Loss: 2.1477 Acc: 0.5659


Best model saved with accuracy: 0.5659
Epoch 8/10
----------


Train Loss: 0.7706 Acc: 0.7529


Val Loss: 0.9622 Acc: 0.7558
Best model saved with accuracy: 0.7558
Epoch 9/10
----------


Train Loss: 0.6121 Acc: 0.7824


Val Loss: 0.6693 Acc: 0.8062
Best model saved with accuracy: 0.8062
Epoch 10/10
----------


Train Loss: 0.5712 Acc: 0.8000


Val Loss: 0.6676 Acc: 0.7984


  model.load_state_dict(torch.load(f'best_resnet50_{j*100}.pth'))


Test Accuracy: 0.7762
Done for 1.0 % data


Train set: 427 images
Validation set: 253 images


Epoch 1/10
----------


Train Loss: 1.1882 Acc: 0.6206


Val Loss: 20.8293 Acc: 0.3241
Best model saved with accuracy: 0.3241
Epoch 2/10
----------


Train Loss: 0.9332 Acc: 0.6932


Val Loss: 1.9791 Acc: 0.6403


Best model saved with accuracy: 0.6403
Epoch 3/10
----------


Train Loss: 0.7213 Acc: 0.7658


Val Loss: 16.4915 Acc: 0.2016
Epoch 4/10
----------


Train Loss: 0.7061 Acc: 0.7705


Val Loss: 3.5945 Acc: 0.5178
Epoch 5/10
----------


Train Loss: 0.6798 Acc: 0.7822


Val Loss: 2.5824 Acc: 0.4190
Epoch 6/10
----------


Train Loss: 0.6024 Acc: 0.8197


Val Loss: 1.1283 Acc: 0.7391


Best model saved with accuracy: 0.7391
Epoch 7/10
----------


Train Loss: 0.5039 Acc: 0.8220


Val Loss: 2.5691 Acc: 0.4466
Epoch 8/10
----------


Train Loss: 0.5424 Acc: 0.8080


Val Loss: 0.3881 Acc: 0.8735


Best model saved with accuracy: 0.8735
Epoch 9/10
----------


Train Loss: 0.3345 Acc: 0.9110


Val Loss: 0.3140 Acc: 0.9130


Best model saved with accuracy: 0.9130
Epoch 10/10
----------


Train Loss: 0.3439 Acc: 0.8829


Val Loss: 0.3185 Acc: 0.9051


Test Accuracy: 0.8742
Done for 2.5 % data


Train set: 854 images
Validation set: 244 images


Epoch 1/10
----------


Train Loss: 1.1292 Acc: 0.5785


Val Loss: 3.8502 Acc: 0.4262
Best model saved with accuracy: 0.4262
Epoch 2/10
----------


Train Loss: 0.8290 Acc: 0.7096


Val Loss: 1.7574 Acc: 0.4877


Best model saved with accuracy: 0.4877
Epoch 3/10
----------


Train Loss: 0.7043 Acc: 0.7529


Val Loss: 0.9319 Acc: 0.6844


Best model saved with accuracy: 0.6844
Epoch 4/10
----------


Train Loss: 0.6249 Acc: 0.7869


Val Loss: 2.2301 Acc: 0.6393
Epoch 5/10
----------


Train Loss: 0.5217 Acc: 0.8162


Val Loss: 1.3451 Acc: 0.6107
Epoch 6/10
----------


Train Loss: 0.4709 Acc: 0.8583


Val Loss: 0.4537 Acc: 0.8238


Best model saved with accuracy: 0.8238
Epoch 7/10
----------


Train Loss: 0.4876 Acc: 0.8384


Val Loss: 0.4249 Acc: 0.8689


Best model saved with accuracy: 0.8689
Epoch 8/10
----------


Train Loss: 0.3262 Acc: 0.8911


Val Loss: 0.2557 Acc: 0.9057


Best model saved with accuracy: 0.9057
Epoch 9/10
----------


Train Loss: 0.2654 Acc: 0.9145


Val Loss: 0.2532 Acc: 0.9098
Best model saved with accuracy: 0.9098
Epoch 10/10
----------


Train Loss: 0.2645 Acc: 0.9215


Val Loss: 0.2598 Acc: 0.8934


Test Accuracy: 0.9173
Done for 5.0 % data


Train set: 1281 images
Validation set: 236 images


Epoch 1/10
----------


Train Loss: 1.0233 Acc: 0.6565


Val Loss: 3.7429 Acc: 0.3898
Best model saved with accuracy: 0.3898
Epoch 2/10
----------


Train Loss: 0.7236 Acc: 0.7557


Val Loss: 2.7307 Acc: 0.5085


Best model saved with accuracy: 0.5085
Epoch 3/10
----------


Train Loss: 0.5792 Acc: 0.8095


Val Loss: 1.2196 Acc: 0.6441


Best model saved with accuracy: 0.6441
Epoch 4/10
----------


Train Loss: 0.5975 Acc: 0.8189


Val Loss: 1.1903 Acc: 0.7754


Best model saved with accuracy: 0.7754
Epoch 5/10
----------


Train Loss: 0.4817 Acc: 0.8462


Val Loss: 0.8762 Acc: 0.7924


Best model saved with accuracy: 0.7924
Epoch 6/10
----------


Train Loss: 0.5287 Acc: 0.8361


Val Loss: 0.6962 Acc: 0.7712
Epoch 7/10
----------


Train Loss: 0.4945 Acc: 0.8501


Val Loss: 0.9318 Acc: 0.7627
Epoch 8/10
----------


Train Loss: 0.3396 Acc: 0.8821


Val Loss: 0.3036 Acc: 0.9195
Best model saved with accuracy: 0.9195
Epoch 9/10
----------


Train Loss: 0.2861 Acc: 0.9102


Val Loss: 0.2565 Acc: 0.9068
Epoch 10/10
----------


Train Loss: 0.2987 Acc: 0.9048


Val Loss: 0.1906 Acc: 0.9364


Best model saved with accuracy: 0.9364


Test Accuracy: 0.9255
Done for 7.5 % data


Train set: 1709 images
Validation set: 227 images


Epoch 1/10
----------


Train Loss: 1.0074 Acc: 0.6583


Val Loss: 1.5971 Acc: 0.6828
Best model saved with accuracy: 0.6828
Epoch 2/10
----------


Train Loss: 0.6645 Acc: 0.7870


Val Loss: 0.6809 Acc: 0.8150


Best model saved with accuracy: 0.8150
Epoch 3/10
----------


Train Loss: 0.6285 Acc: 0.8110


Val Loss: 1.5329 Acc: 0.6344
Epoch 4/10
----------


Train Loss: 0.5444 Acc: 0.8221


Val Loss: 1.2419 Acc: 0.7533
Epoch 5/10
----------


Train Loss: 0.4734 Acc: 0.8496


Val Loss: 2.3806 Acc: 0.5374
Epoch 6/10
----------


Train Loss: 0.3774 Acc: 0.8806


Val Loss: 0.7801 Acc: 0.7225
Epoch 7/10
----------


Train Loss: 0.4155 Acc: 0.8584


Val Loss: 0.4492 Acc: 0.8546


Best model saved with accuracy: 0.8546
Epoch 8/10
----------


Train Loss: 0.2789 Acc: 0.9046


Val Loss: 0.1633 Acc: 0.9559


Best model saved with accuracy: 0.9559
Epoch 9/10
----------


Train Loss: 0.2364 Acc: 0.9239


Val Loss: 0.2200 Acc: 0.9251
Epoch 10/10
----------


Train Loss: 0.2315 Acc: 0.9210


Val Loss: 0.1594 Acc: 0.9515


Test Accuracy: 0.9297
Done for 10.0 % data


Train set: 3418 images
Validation set: 193 images


Epoch 1/10
----------


Train Loss: 0.8109 Acc: 0.7300


Val Loss: 2.9894 Acc: 0.5492
Best model saved with accuracy: 0.5492
Epoch 2/10
----------


Train Loss: 0.5394 Acc: 0.8373


Val Loss: 7.1756 Acc: 0.3161
Epoch 3/10
----------


Train Loss: 0.4482 Acc: 0.8616


Val Loss: 0.2697 Acc: 0.9171


Best model saved with accuracy: 0.9171
Epoch 4/10
----------


Train Loss: 0.3804 Acc: 0.8774


Val Loss: 0.7144 Acc: 0.7358
Epoch 5/10
----------


Train Loss: 0.3431 Acc: 0.8865


Val Loss: 0.2372 Acc: 0.9378


Best model saved with accuracy: 0.9378
Epoch 6/10
----------


Train Loss: 0.3346 Acc: 0.8947


Val Loss: 0.2980 Acc: 0.9223
Epoch 7/10
----------


Train Loss: 0.2890 Acc: 0.9067


Val Loss: 0.3517 Acc: 0.8964
Epoch 8/10
----------


Train Loss: 0.2038 Acc: 0.9365


Val Loss: 0.1629 Acc: 0.9585


Best model saved with accuracy: 0.9585
Epoch 9/10
----------


Train Loss: 0.1780 Acc: 0.9389


Val Loss: 0.1659 Acc: 0.9534
Epoch 10/10
----------


Train Loss: 0.1603 Acc: 0.9494


Val Loss: 0.1501 Acc: 0.9378


Test Accuracy: 0.9423
Done for 20.0 % data


Train set: 5127 images
Validation set: 159 images


Epoch 1/10
----------


Train Loss: 0.7621 Acc: 0.7425


Val Loss: 0.6970 Acc: 0.7862
Best model saved with accuracy: 0.7862
Epoch 2/10
----------


Train Loss: 0.4728 Acc: 0.8438


Val Loss: 1.3873 Acc: 0.6541
Epoch 3/10
----------


Train Loss: 0.4895 Acc: 0.8420


Val Loss: 0.6309 Acc: 0.8365


Best model saved with accuracy: 0.8365
Epoch 4/10
----------


Train Loss: 0.3577 Acc: 0.8808


Val Loss: 0.4047 Acc: 0.8742


Best model saved with accuracy: 0.8742
Epoch 5/10
----------


Train Loss: 0.3428 Acc: 0.8836


Val Loss: 0.3097 Acc: 0.8805


Best model saved with accuracy: 0.8805
Epoch 6/10
----------


Train Loss: 0.3227 Acc: 0.8982


Val Loss: 0.4985 Acc: 0.8428
Epoch 7/10
----------


Train Loss: 0.2613 Acc: 0.9148


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
data = {
        'Data Split': [f'{i*100}%' for i in dataper],
        'Classifier': ['ResNET50 Baseline' for i in range(7)],
        'Test Acc (%)': [round(accuracies['test'][i]*100,2) for i in range(7)]
}

df = pd.DataFrame(data)
fig, ax = plt.subplots(figsize=(8, 2))
ax.axis('tight')
ax.axis('off')
tbl = ax.table(cellText=df.values, colLabels=df.columns, cellLoc='center', loc='center')
tbl.auto_set_font_size(False)
tbl.set_fontsize(10)
tbl.scale(1.2, 1.2) 
plt.title(f'ResNET50 Comparison Table', pad=20)
plt.show()