In [1]:
import os
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
from sklearn.model_selection import train_test_split

# Update these paths to match your local machine
DATA_ROOT = r"D:\test\CP-AnemiC"
EXCEL_PATH = os.path.join(DATA_ROOT, "Anemia_Data_Collection_Sheet.xlsx")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

df = pd.read_excel(EXCEL_PATH)
print("Columns:", list(df.columns))
print("Shape:", df.shape)

Device: cpu
Columns: ['IMAGE_ID', 'HB_LEVEL', 'Severity', 'Age(Months)', 'GENDER', 'REMARK', 'HOSPITAL', 'CITY/TOWN', 'MUNICIPALITY/DISTRICT', 'REGION', 'COUNTRY']
Shape: (710, 11)


In [2]:
class EyeHbDataset(Dataset):
    def __init__(self, excel_path=None, img_root=None, transform=None, mode="train", image_list=None):
        self.img_root = img_root
        self.transform = transform
        self.mode = mode

        if mode == "train":
            df = pd.read_excel(excel_path)
            valid_rows = []
            for _, row in df.iterrows():
                img_id = str(row["IMAGE_ID"])
                fname = img_id if img_id.lower().endswith((".png", ".jpg")) else img_id + ".png"
                
                path_anemic = os.path.join(self.img_root, "Anemic", fname)
                path_non_anemic = os.path.join(self.img_root, "Non-anemic", fname)

                img_path = path_anemic if os.path.exists(path_anemic) else path_non_anemic if os.path.exists(path_non_anemic) else None
                
                if img_path:
                    valid_rows.append({"HB_LEVEL": row["HB_LEVEL"], "__FULL_PATH__": img_path})
            self.df = pd.DataFrame(valid_rows)
        else:
            # For testing/inference mode using a list of file paths
            self.image_paths = image_list 

    def __len__(self): 
        return len(self.df) if self.mode == "train" else len(self.image_paths)

    def __getitem__(self, idx):
        if self.mode == "train":
            row = self.df.iloc[idx]
            img_path = row["__FULL_PATH__"]
            hb_value = float(row["HB_LEVEL"])
        else:
            img_path = self.image_paths[idx]
            hb_value = 0.0 # Placeholder for test mode

        img_bgr = cv2.imread(img_path)
        if img_bgr is None: raise FileNotFoundError(img_path)
        
        img_bgr = cv2.resize(img_bgr, (224, 224))
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        img_tensor = self.transform(img_rgb) if self.transform else T.ToTensor()(img_rgb)
        hb_tensor = torch.tensor([hb_value], dtype=torch.float32)

        return img_tensor, hb_tensor

In [3]:
class HbNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.backbone.fc = nn.Identity() 
        
        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1) 
        )

    def forward(self, x_img):
        feat = self.backbone(x_img)
        return self.fc(feat)

In [4]:
class HbNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.backbone.fc = nn.Identity()  # 512 image feats
        
        self.fc = nn.Sequential(
            nn.Linear(512, 256),  # Image only!
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1)  # Hb prediction
        )

    def forward(self, x_img):
        feat = self.backbone(x_img)
        return self.fc(feat)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cpu


In [5]:
# Setup Data
train_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_ds = EyeHbDataset(excel_path=EXCEL_PATH, img_root=DATA_ROOT, transform=train_transform, mode="train")
train_indices, val_indices = train_test_split(range(len(full_ds)), test_size=0.2, random_state=42)

train_loader = DataLoader(torch.utils.data.Subset(full_ds, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(torch.utils.data.Subset(full_ds, val_indices), batch_size=32)

model = HbNet().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop logic from your notebook
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for imgs, hbs in loader:
        imgs, hbs = imgs.to(device), hbs.to(device)
        optimizer.zero_grad()
        preds = model(imgs)
        loss = criterion(preds, hbs)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    return total_loss / len(loader.dataset)

# Run for 20 epochs
for epoch in range(1, 21):
    loss = train_epoch(model, train_loader, optimizer, criterion, device)
    print(f"Epoch {epoch:02d} | Loss: {loss:.4f}")

torch.save(model.state_dict(), "best_hb_model.pth")

Epoch 01 | Loss: 76.2154
Epoch 02 | Loss: 26.6839
Epoch 03 | Loss: 5.1889
Epoch 04 | Loss: 2.8715
Epoch 05 | Loss: 2.6853
Epoch 06 | Loss: 2.4769
Epoch 07 | Loss: 2.2888
Epoch 08 | Loss: 2.3175
Epoch 09 | Loss: 2.2544
Epoch 10 | Loss: 2.2501
Epoch 11 | Loss: 2.0842
Epoch 12 | Loss: 2.0541
Epoch 13 | Loss: 1.9768
Epoch 14 | Loss: 1.9108
Epoch 15 | Loss: 1.8622
Epoch 16 | Loss: 1.9863
Epoch 17 | Loss: 1.9151
Epoch 18 | Loss: 1.8270
Epoch 19 | Loss: 2.0254
Epoch 20 | Loss: 1.9284


In [7]:
def anemia_from_hb(hb): 
    # Returns 1 if Anemic (below 12.5), 0 otherwise
    return int(hb < 12.5)

def eval_anemia_accuracy(model, loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for imgs, hbs_true in loader:
            imgs = imgs.to(device)
            hbs_true = hbs_true.to(device)
            
            # Get numerical predictions from the model
            hbs_pred = model(imgs)
            
            # Convert both true and predicted Hb to binary (Anemic vs Non-Anemic)
            for i in range(len(hbs_true)):
                pred_label = anemia_from_hb(hbs_pred[i].item())
                true_label = anemia_from_hb(hbs_true[i].item())
                
                if pred_label == true_label:
                    correct += 1
                total += 1
                
    accuracy = (correct / total) * 100
    return accuracy

# Calculate accuracy on the validation set
val_accuracy = eval_anemia_accuracy(model, val_loader, device)
print(f"Validation Anemia Accuracy: {val_accuracy:.1f}%")

Validation Anemia Accuracy: 75.4%
