In [7]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

class BoneMarrowDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform
        self.classes = sorted(self.data['labels'].unique())
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['filepaths']
        label = self.class_to_idx[self.data.iloc[idx]['labels']]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


In [8]:

def define_paths(data_dir):
    filepaths, labels = [], []
    for fold in os.listdir(data_dir):
        fold_path = os.path.join(data_dir, fold)
        for file in os.listdir(fold_path):
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                filepaths.append(os.path.join(fold_path, file))
                labels.append(fold)
    return filepaths, labels

def create_df(data_dir):
    files, classes = define_paths(data_dir)
    df = pd.DataFrame({'filepaths': files, 'labels': classes})
    return train_test_split(df, train_size=0.9, stratify=df['labels'], random_state=123)




In [12]:
data_dir = "D:\\data\\bonMarrowCancerData\\bone_marrow_cell_dataset"


train_df, test_df = create_df(data_dir)


In [14]:
len(train_df),len(test_df),



(1175, 131)

In [17]:
from torch.utils.data import DataLoader
import torchvision.transforms as T

train_transform = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.ToTensor()
])

test_transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor()
])

train_dataset = BoneMarrowDataset(train_df, transform=train_transform)
test_dataset = BoneMarrowDataset(test_df, transform=test_transform)

batch_size = 40
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [18]:
import timm
import torch.nn as nn

num_classes = len(train_dataset.classes)

model = timm.create_model('efficientnet_b5', pretrained=True)
model.classifier = nn.Sequential(
    nn.BatchNorm1d(model.classifier.in_features),
    nn.Linear(model.classifier.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.45),
    nn.Linear(256, num_classes)
)


model.safetensors:   0%|          | 0.00/122M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [19]:
model

EfficientNet(
  (conv_stem): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
    

In [20]:
import torch
import torch.optim as optim
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, epochs=40):
    best_acc = 0.0
    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0.0, 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {train_acc:.4f}")
        evaluate_model(model, val_loader)

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)
    print(f"Validation Accuracy: {100 * correct / total:.2f}%")


In [21]:
train_model(model, train_loader, test_loader, epochs=2)

# Save the model
#torch.save(model.state_dict(), f'EfficientNetB5-Bone-Marrow-Cells-Classification-{round(100 * correct / total, 2)}.pth')


100%|██████████| 30/30 [01:11<00:00,  2.37s/it]


Epoch [1/2], Loss: 28.1311, Accuracy: 0.6877
Validation Accuracy: 87.79%


100%|██████████| 30/30 [00:11<00:00,  2.67it/s]


Epoch [2/2], Loss: 13.1426, Accuracy: 0.8638
Validation Accuracy: 79.39%
