In [27]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import WeightedRandomSampler
import torch.nn.functional as F
from PIL import Image
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np
import time

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
file_path = "C:/Data/DJ/azcopydata/midasmultimodalimagedatasetforaibasedskincancer/release_midas.xlsx"
df = pd.read_excel(file_path)

In [30]:
image_dir = os.path.dirname(file_path)
df = df.astype(str)

label_encoders = {}
categorical_cols = ['midas_gender', 'midas_fitzpatrick', 'midas_ethnicity', 'midas_race']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

numerical_cols = ['midas_age', 'length_(mm)', 'width_(mm)']
df[numerical_cols] = df[numerical_cols].apply(pd.to_numeric, errors='coerce')

scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols].fillna(0))

df['target'] = df['midas_melanoma'].map({'yes': 1, 'no': 0})

df = df.dropna(subset=['target']).reset_index(drop=True)

print("Unique label values:", df["target"].unique())
print("Label data type:", df["target"].dtype)

Unique label values: [0. 1.]
Label data type: float64


In [31]:
def file_exists(filename):
    possible_paths = [
        os.path.join(image_dir, filename),
        os.path.join(image_dir, filename.replace('.jpg', '.jpeg')),
        os.path.join(image_dir, filename.replace('.jpeg', '.jpg'))
    ]
    return any(os.path.exists(path) for path in possible_paths)

df = df[df['midas_file_name'].apply(file_exists)].reset_index(drop=True)


In [32]:
class MRAMIDASDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.data = dataframe
        self.image_dir = image_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        
        # Load image with flexible file extension handling
        img_filename = row['midas_file_name']
        img_path = os.path.join(self.image_dir, img_filename)
        
        if not os.path.exists(img_path):
            # Try alternative extensions
            img_path_jpeg = img_path.replace('.jpg', '.jpeg')
            img_path_jpg = img_path.replace('.jpeg', '.jpg')
            if os.path.exists(img_path_jpeg):
                img_path = img_path_jpeg
            elif os.path.exists(img_path_jpg):
                img_path = img_path_jpg
            else:
                raise FileNotFoundError(f"Image not found: {img_filename}")
        
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        # Load metadata
        metadata_values = row[categorical_cols + numerical_cols].values.astype(float)
        metadata = torch.tensor(metadata_values, dtype=torch.float32)
        
        # Ensure label is an integer before creating tensor
        label = int(row['target'])  # Convert from potential object type
        label = torch.tensor(label, dtype=torch.int64)
        
        return image, metadata, label

In [None]:
transform = transforms.Compose([
    transforms.Resize((380, 380)),  # Increase resolution
    transforms.RandomHorizontalFlip(p=0.7),  # Increase probability of flipping
    transforms.RandomRotation(30),  # Higher rotation range
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2),  # More aggressive color changes
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



In [34]:
labels = df['target'].values
class_sample_count = np.array([len(np.where(labels == t)[0]) for t in np.unique(labels)])
weight = 1. / class_sample_count
samples_weight = np.array([weight[int(t)] for t in labels])
samples_weight = torch.tensor(samples_weight, dtype=torch.float)
sampler = WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)
dataset = MRAMIDASDataset(df, image_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)

In [46]:
class ImageModel(nn.Module):
    def __init__(self):
        super(ImageModel, self).__init__()
        self.model = models.efficientnet_b3(pretrained=True)  # Use B3 instead of B0
        self.model.classifier[1] = nn.Linear(1536, 256)  # Increase output layer size
    
    def forward(self, x):
        return self.model(x)

In [36]:
class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.W = nn.Linear(input_dim, input_dim)
    
    def forward(self, x):
        attn_weights = F.softmax(self.W(x), dim=1)
        return x * attn_weights

In [37]:
class MetadataModel(nn.Module):
    def __init__(self, input_size):
        super(MetadataModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 256),  # Increase FC layers size
            nn.ReLU(),
            nn.LayerNorm(256),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.LayerNorm(128),
            AttentionLayer(128),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
    
    def forward(self, x):
        return self.fc(x)


In [47]:
class MultimodalModel(nn.Module):
    def __init__(self, image_model, metadata_model):
        super(MultimodalModel, self).__init__()
        self.image_model = image_model
        self.metadata_model = metadata_model
        self.classifier = nn.Linear(256 + 32, 2)  # Combining both feature sets
    
    def forward(self, image, metadata):
        img_features = self.image_model(image)
        meta_features = self.metadata_model(metadata)
        combined = torch.cat((img_features, meta_features), dim=1)
        return self.classifier(combined)

In [48]:
image_model = ImageModel().to(device)
metadata_model = MetadataModel(input_size=len(categorical_cols) + len(numerical_cols)).to(device)
model = MultimodalModel(image_model, metadata_model).to(device)




In [49]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0002, weight_decay=1e-5)  # Lower LR
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

In [50]:
df_melanoma = df[df["target"] == 1]
df_non_melanoma = df[df["target"] == 0]

df_oversampled = pd.concat([df, df_melanoma, df_melanoma, df_melanoma])  # 3x duplication
df_oversampled = df_oversampled.sample(frac=1).reset_index(drop=True)  # Shuffle

In [51]:
checkpoint_path = "C:/Data/DJ/SkinCancer/code/pytorch_models/mra_midas_efficientnetB3.pth"


In [52]:
num_epochs = 50
train_losses = []
train_accuracies = []
train_precisions = []
train_recalls = []
train_f1_scores = []
epoch_times = []

In [None]:
print(f"Using device :{device}")
for epoch in range(num_epochs):
    start_time = time.time()  # Start timer
    total_loss = 0
    correct, total = 0, 0
    all_preds = []
    all_labels = []

    print(f"Started epoch - {epoch+1}")

    for images, metadata, labels in dataloader:
        images, metadata, labels = images.to(device), metadata.to(device), labels.to(torch.int64).to(device)
        optimizer.zero_grad()
        outputs = model(images, metadata)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # Collect predictions and labels for Precision, Recall, F1-score
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    # Compute epoch metrics
    accuracy = correct / total
    precision = precision_score(all_labels, all_preds, average="weighted")
    recall = recall_score(all_labels, all_preds, average="weighted")
    f1 = f1_score(all_labels, all_preds, average="weighted")
    epoch_time = time.time() - start_time  # End timer

    # Store metrics
    train_losses.append(total_loss)
    train_accuracies.append(accuracy)
    train_precisions.append(precision)
    train_recalls.append(recall)
    train_f1_scores.append(f1)
    epoch_times.append(epoch_time)

    scheduler.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}, Time: {epoch_time:.2f} seconds")

    # **Save checkpoint every 5 epochs**
    if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:  # Save every 5 epochs and last epoch
        checkpoint = {
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_losses': train_losses,
            'train_precisions': train_precisions,
            'train_accuracies': train_accuracies,
            'train_recalls': train_recalls,
            'train_f1_scores': train_f1_scores,
            'epoch_times': epoch_times
        }
        torch.save(checkpoint, checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")

Using device :cpu
Started epoch - 1
