<a href="https://colab.research.google.com/github/Priyabratagni/DBMI_Demographic_Bias_in_Medical_Imaging/blob/main/EfficientNetModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installing Requirements

## Importing Required Libraries

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import timm
import torch.optim as optim

## Load the csv file and Image data

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DermaMNISTDataset.csv')
npz = np.load('/content/drive/MyDrive/Colab Notebooks/DermaMNIST_Corrected_224.npz')

In [None]:
df

In [None]:
import matplotlib.pyplot as plt

colums_dx = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DermaMNISTDataset.csv')['dx']
for i in range(5):
    plt.subplot(1, 5, i + 1)
    plt.imshow(npz['train_images'][i])
    plt.title(colums_dx[i])
    plt.axis('off')
plt.show()

Convert text classes to Numerical Format

In [None]:
label_encoder = LabelEncoder()
df['dx_enc'] = label_encoder.fit_transform(df['dx'])

In [None]:
class DermaDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images.astype(np.float32) / 255.0
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        img = np.transpose(img, (2, 0, 1))
        label = self.labels[idx]
        return torch.tensor(img, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [None]:
train_idx = df[df['split'] == 'train'].index
val_idx = df[df['split'] == 'val'].index
test_idx = df[df['split'] == 'test'].index

In [None]:
train_labels = df.loc[train_idx, 'dx_enc'].to_numpy()
val_labels = df.loc[val_idx, 'dx_enc'].to_numpy()
test_labels = df.loc[test_idx, 'dx_enc'].to_numpy()

In [None]:
trainset = DermaDataset(npz['train_images'], train_labels)
valset = DermaDataset(npz['val_images'], val_labels)
testset = DermaDataset(npz['test_images'], test_labels)

In [None]:
train_loader = DataLoader(trainset, batch_size=32, shuffle=True)
val_loader = DataLoader(valset, batch_size=32)
test_loader = DataLoader(testset, batch_size=32)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model('efficientnet_b1', pretrained=True)
model.classifier = nn.Linear(model.classifier.in_features, 7)
model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [None]:
num_epochs = 10

train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    # Training
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    train_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()

    val_loss /= len(val_loader.dataset)
    val_acc = correct / len(val_loader.dataset)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

print(f"Training completed! Total epochs: {len(train_losses)}")

In [None]:
# Test
model.eval()
correct = 0
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
test_acc = correct / len(test_loader.dataset)
print(f"Test Accuracy: {test_acc:.4f}")


In [None]:
import matplotlib.pyplot as plt

completed_epochs = len(train_losses)
epochs_completed = range(1, completed_epochs + 1)

plt.figure(figsize=(10,4))

plt.subplot(1,2,1)

plt.plot(epochs_completed, train_losses, 'bo-', label='Train Loss', markersize=8)
plt.plot(epochs_completed, val_losses, 'ro-', label='Val Loss', markersize=8)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(epochs_completed, val_accuracies, 'go-', label='Val Accuracy', markersize=8)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

print(f"Completed epochs: {completed_epochs}")
print(f"Train Loss: {train_losses}")
print(f"Val Loss: {val_losses}")
print(f"Val Accuracy: {val_accuracies}")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

In [None]:
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

In [None]:
report = classification_report(
    all_labels,
    all_preds,
    target_names=label_encoder.classes_,
    digits=3
)
print(report)

In [None]:
import numpy as np

report_dict = classification_report(
    all_labels, all_preds, target_names=label_encoder.classes_, output_dict=True
)
f1_scores = [report_dict[c]['f1-score'] for c in label_encoder.classes_]

plt.figure(figsize=(8,4))
plt.bar(label_encoder.classes_, f1_scores, color='skyblue')
plt.xlabel('Class')
plt.ylabel('F1-score')
plt.title('F1-score per Class')
plt.ylim(0, 1)
plt.tight_layout()
plt.show()