In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.datasets import ImageFolder
from PIL import Image
import glob
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score


In [None]:
# Data Augmentation and Preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# Dataset Class for Loading Images
class SickleCellDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
import os

print(os.path.exists(r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Positive"))
print(os.path.exists(r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Negative"))


True
True


In [None]:
import os

positive_folder = r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Positive"
negative_folder = r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Negative"

print("Positive folder files:")
print(os.listdir(positive_folder))

print("\nNegative folder files:")
print(os.listdir(negative_folder))


Positive folder files:
['Labelled', 'Unlabelled']

Negative folder files:
['Clear']


In [None]:
import os
import glob

# Define the exact subfolder paths
positive_folder = r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Positive\Labelled"
negative_folder = r"C:\Users\bewaj\Downloads\sickle cell research project datas needed\archive\Negative\Clear"

# File types to look for
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']

positive_images = []
negative_images = []

# Collect all matching files
for ext in image_extensions:
    positive_images.extend(glob.glob(os.path.join(positive_folder, ext)))
    negative_images.extend(glob.glob(os.path.join(negative_folder, ext)))

# print
print(f"✅ Found {len(positive_images)} positive images")
print(f"✅ Found {len(negative_images)} negative images")

# Combine paths and labels
image_paths = positive_images + negative_images
labels = [1] * len(positive_images) + [0] * len(negative_images)

# Now create dataset
dataset = SickleCellDataset(image_paths=image_paths, labels=labels, transform=transform)


✅ Found 844 positive images
✅ Found 294 negative images


In [None]:
# Train-Val-Test Split (70-15-15)
train_ratio, val_ratio = 0.7, 0.15
train_size = int(train_ratio * len(dataset))
val_size = int(val_ratio * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [None]:
# Data Loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
# Define Model (Custom CNN or Pretrained ResNet18)
class SickleCellClassifier(nn.Module):
    def __init__(self, base_model="resnet"):
        super(SickleCellClassifier, self).__init__()
        if base_model == "resnet":
            self.model = models.resnet18(pretrained=True)
            self.model.fc = nn.Linear(self.model.fc.in_features, 2)
        elif base_model == "mobilenet":
            self.model = models.mobilenet_v2(pretrained=True)
            self.model.classifier[1] = nn.Linear(self.model.last_channel, 2)
        self.gradients = None
        self.activations = None

    def activations_hook(self, grad):
        self.gradients = grad

    def forward(self, x):
        x = self.model(x)
        return x


In [None]:
# Instantiate Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SickleCellClassifier(base_model="resnet")
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\bewaj/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|█████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:06<00:00, 6.92MB/s]


SickleCellClassifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True

In [None]:
# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)


In [None]:
# Training Loop
num_epochs = 50
best_val_acc = 0
model_save_path = "best_sickle_cell_model.pth"  # Path to save the best model

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
            val_total += labels.size(0)

    val_acc = 100 * val_correct / val_total
    print(f"Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), model_save_path)

    # Early stopping if desired validation accuracy is reached
    if best_val_acc >= 98.0:
        print("Desired accuracy achieved. Stopping early.")
        break

Training Epoch 1/50:   0%|                                                                      | 0/25 [00:00<?, ?it/s]