In [None]:
pip install timm

In [None]:
import torch
import torch.nn as nn
from timm import create_model  # Import ConvNext from timm
import os
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import roc_curve, auc
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform

        self.image_files = []
        self.labels = []

        for class_folder in os.listdir(data_path):
            class_path = os.path.join(data_path, class_folder)
            if os.path.isdir(class_path):
                for filename in os.listdir(class_path):
                    img_path = os.path.join(class_path, filename)
                    self.image_files.append(img_path)
                    self.labels.append(int(class_folder))

    def __getitem__(self, index):
        img = Image.open(self.image_files[index]).convert("RGB")
        label = self.labels[index]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    def __len__(self):
        return len(self.image_files)


# Define transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
# Define paths to train and test datasets
path = r'C:\Users\User\Desktop\melanoma dataset'


In [None]:
# Calculate the total number of samples for train
num_samples = len(os.listdir(os.path.join(path, "0"))) + \
                     len(os.listdir(os.path.join(path, "1")))

print(num_samples)

print("Number of samples in class 0:", len(os.listdir(os.path.join(path, "0"))))
print("Number of samples in class 1:", len(os.listdir(os.path.join(path, "1"))))



In [None]:
#  Load the ConvNext model 
model = create_model('convnext_tiny', pretrained=True)
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.head.fc.in_features
model.head.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)


In [None]:

# Load the datasets
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Lists to store metrics for each fold
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_f1_scores = []

for fold, (train_index, val_index) in enumerate(kf.split(range(num_samples), [str(label) for label in CustomDataset(path).labels])):
    print(f'Fold {fold + 1}/5')

    # Create data loaders for training and validation
    train_dataset = CustomDataset(data_path=path, transform=data_transforms)
    val_dataset = CustomDataset(data_path=path, transform=data_transforms)

    train_loader = DataLoader(torch.utils.data.Subset(train_dataset, train_index), batch_size=16, shuffle=True)
    val_loader = DataLoader(torch.utils.data.Subset(val_dataset, val_index), batch_size=16, shuffle=False)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=1e-4)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # Lists to store metrics for each epoch
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    # Training loop
    for epoch in range(20):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100. * correct_train / total_train

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total_val += labels.size(0)
                correct_val += predicted.eq(labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100. * correct_val / total_val

        # Metrics for each epoch
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        # Print metrics for each epoch
        print(f'Epoch {epoch + 1}/{20}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, '
              f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        # Decay learning rate every 30 epochs
        exp_lr_scheduler.step()

    # Calculate and print metrics for each fold
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            all_labels.extend(labels.numpy())
            all_preds.extend(predicted.numpy())

    fold_accuracy = accuracy_score(all_labels, all_preds)
    fold_precision = precision_score(all_labels, all_preds, average='macro')
    fold_recall = recall_score(all_labels, all_preds, average='macro')
    fold_f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Fold {fold + 1} Metrics:')
    print(f'Accuracy: {fold_accuracy:.4f}')
    print(f'Precision: {fold_precision:.4f}')
    print(f'Recall: {fold_recall:.4f}')
    print(f'F1 Score: {fold_f1:.4f}')


    # Save metrics for each fold
    fold_accuracies.append(fold_accuracy)
    fold_precisions.append(fold_precision)
    fold_recalls.append(fold_recall)
    fold_f1_scores.append(fold_f1)

    # Plot training and validation metrics for each epoch
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Training Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.show()


In [None]:

# Calculate average accuracy, precision, recall, and F1 score across all folds
avg_accuracy = sum(fold_accuracies) / len(fold_accuracies)
avg_precision = sum(fold_precisions) / len(fold_precisions)
avg_recall = sum(fold_recalls) / len(fold_recalls)
avg_f1 = sum(fold_f1_scores) / len(fold_f1_scores)

# Print average metrics
print(f'Average Accuracy: {avg_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1:.4f}')

# Create a confusion matrix for all folds
all_labels = []
all_preds = []

for fold in range(5):
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            all_labels.extend(labels.numpy())
            all_preds.extend(predicted.numpy())

confusion_matrix = confusion_matrix(all_labels, all_preds)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix, annot=True, fmt="d", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.title("Confusion Matrix (All Folds)")
plt.show()

In [None]:
# Calculate and print average metrics across folds
avg_accuracy = sum(fold_accuracies) / len(fold_accuracies)
avg_precision = sum(fold_precisions) / len(fold_precisions)
avg_recall = sum(fold_recalls) / len(fold_recalls)
avg_f1 = sum(fold_f1_scores) / len(fold_f1_scores)

print(f'Average Metrics Across Folds:')
print(f'Average Accuracy: {avg_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1 Score: {avg_f1:.4f}')


In [None]:
# print final metrics and plot confusion matrix and ROC AUC:


print('Final Metrics:')
print(f'Accuracy: {sum(fold_accuracies) / len(fold_accuracies):.4f}')
print(f'Precision: {sum(fold_precisions) / len(fold_precisions):.4f}')
print(f'Recall: {sum(fold_recalls) / len(fold_recalls):.4f}')
print(f'F1 Score: {sum(fold_f1_scores) / len(fold_f1_scores):.4f}')

# Get all predictions and labels for ROC AUC:


import torch
from sklearn.metrics import roc_auc_score
all_labels = []
all_pred_probs = []
for inputs, labels in val_loader:
    outputs = model(inputs)
    all_labels.extend(labels.numpy())
    all_pred_probs.extend(outputs[:, 1].detach().numpy())  

# Calculate ROC AUC:
roc_auc = roc_auc_score(all_labels, all_pred_probs)
print(f'ROC AUC: {roc_auc:.4f}')

# Plot confusion matrix:
cm = confusion_matrix(all_labels, all_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()




In [None]:
# Get all predictions and labels for ROC AUC:
all_labels = []
all_pred_probs = []
for inputs, labels in val_loader:
    outputs = model(inputs)
    all_labels.extend(labels.numpy())
    all_pred_probs.extend(outputs[:, 1].detach().numpy())  # Assuming class 1 is positive

# Calculate ROC AUC:
roc_auc = roc_auc_score(all_labels, all_pred_probs)
print(f'ROC AUC: {roc_auc:.4f}')

# Plot ROC AUC curve:
fpr, tpr, thresholds = roc_curve(all_labels, all_pred_probs)
plt.plot(fpr, tpr, label='ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC AUC Curve')
plt.show()