In [1]:
pip install opencv-python



Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install open_clip_torch==2.23.0 transformers==4.35.2 matplotlib


















































Note: you may need to restart the kernel to use updated packages.


In [3]:
import cv2 

In [6]:
pip install torch transformers tqdm
































Note: you may need to restart the kernel to use updated packages.


# Final Code

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from tqdm import tqdm
import open_clip
import pandas as pd

class BiomedCLIPClassifier(nn.Module):
    def __init__(self, model_name, num_classes):
        super().__init__()
        self.model, self.preprocess, _ = open_clip.create_model_and_transforms(
            'hf-hub:' + model_name
        )
        self.tokenizer = open_clip.get_tokenizer('hf-hub:' + model_name)
        self.logit_scale = nn.Parameter(torch.ones([]) * torch.log(torch.tensor(1/0.07)))
        
        self.class_names = ['Angioectasia', 'Bleeding', 'Erosion', 'Erythema', 
                           'Foreign Body', 'Lymphangiectasia', 'Normal', 'Polyp', 
                           'Ulcer', 'Worms']
        self.register_buffer('text_features', self._encode_text(self.class_names))
        
    def _encode_text(self, class_names):
        text_tokens = self.tokenizer(class_names)
        with torch.no_grad():
            text_features = self.model.encode_text(text_tokens)
            text_features = text_features / text_features.norm(dim=-1, keepdim=True)
        return text_features
    
    def forward(self, images):
        image_features = self.model.encode_image(images)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        logits = torch.matmul(image_features, self.text_features.t()) * self.logit_scale.exp()
        return logits

class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = {
            'Angioectasia': 0, 'Bleeding': 1, 'Erosion': 2, 'Erythema': 3,
            'Foreign Body': 4, 'Lymphangiectasia': 5, 'Normal': 6, 'Polyp': 7,
            'Ulcer': 8, 'Worms': 9
        }
        
        self.samples = []
        for class_name in os.listdir(root_dir):
            class_dir = os.path.join(root_dir, class_name)
            if os.path.isdir(class_dir):
                for subfolder in os.listdir(class_dir):
                    subfolder_path = os.path.join(class_dir, subfolder)
                    if os.path.isdir(subfolder_path):
                        for img_name in os.listdir(subfolder_path):
                            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                                img_path = os.path.join(subfolder_path, img_name)
                                self.samples.append((img_path, self.class_to_idx[class_name], subfolder))  # Save subfolder as Dataset name
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label, dataset_name = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, label, img_path, dataset_name  # Return dataset_name as well

class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = [
            os.path.join(root_dir, img_name)
            for img_name in os.listdir(root_dir)
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        # Only return the image and the filename (basename of path)
        return image, os.path.basename(img_path)
    

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from tqdm import tqdm
import open_clip
import pandas as pd
import numpy as np
from sklearn.metrics import (
    balanced_accuracy_score, roc_auc_score, roc_curve,
    precision_recall_curve, average_precision_score,
    f1_score, confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle

# [Previous model and dataset classes remain the same...]
# BiomedCLIPClassifier, CustomImageDataset, and TestDataset classes stay unchanged

class MetricsTracker:
    def __init__(self):
        self.train_losses = []
        self.train_accs = []
        self.val_losses = []
        self.val_accs = []
        self.epoch_metrics = []

def calculate_specificity(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    specificities = []
    for i in range(len(cm)):
        tn = np.sum(cm) - np.sum(cm[i,:]) - np.sum(cm[:,i]) + cm[i,i]
        fp = np.sum(cm[:,i]) - cm[i,i]
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        specificities.append(specificity)
    return specificities

def evaluate_model(results_df, class_names):
    label_to_idx = {label: idx for idx, label in enumerate(class_names)}
    y_true = np.array([label_to_idx[label] for label in results_df['true_label']])
    y_pred = np.array([label_to_idx[label] for label in results_df['predicted_class']])
    y_scores = results_df[class_names].values
    
    metrics = {}
    
    # Balanced Accuracy
    metrics['balanced_accuracy'] = balanced_accuracy_score(y_true, y_pred)
    
    # Per-class metrics
    metrics['per_class'] = {}
    for i, class_name in enumerate(class_names):
        y_true_binary = (y_true == i).astype(int)
        y_pred_binary = (y_pred == i).astype(int)
        
        # AUC-ROC
        auc = roc_auc_score(y_true_binary, y_scores[:, i])
        
        # Calculate ROC curve for plotting
        fpr, tpr, _ = roc_curve(y_true_binary, y_scores[:, i])
        
        # Precision-Recall curve
        precision, recall, _ = precision_recall_curve(y_true_binary, y_scores[:, i])
        ap = average_precision_score(y_true_binary, y_scores[:, i])
        
        # F1 Score
        f1 = f1_score(y_true_binary, y_pred_binary)
        
        metrics['per_class'][class_name] = {
            'auc': auc,
            'roc_curve': (fpr, tpr),
            'precision_recall': (precision, recall),
            'average_precision': ap,
            'f1_score': f1
        }
    
    # Calculate mean metrics
    metrics['mean_auc'] = np.mean([m['auc'] for m in metrics['per_class'].values()])
    metrics['mean_ap'] = np.mean([m['average_precision'] for m in metrics['per_class'].values()])
    metrics['mean_f1'] = np.mean([m['f1_score'] for m in metrics['per_class'].values()])
    
    # Confusion Matrix
    metrics['confusion_matrix'] = confusion_matrix(y_true, y_pred)
    
    return metrics

def plot_training_history(metrics_tracker):
    plt.figure(figsize=(12, 5))
    
    # Plot training curves
    plt.subplot(1, 2, 1)
    plt.plot(metrics_tracker.train_losses, label='Train Loss')
    plt.plot(metrics_tracker.val_losses, label='Val Loss')
    plt.title('Loss vs Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(metrics_tracker.train_accs, label='Train Accuracy')
    plt.plot(metrics_tracker.val_accs, label='Val Accuracy')
    plt.title('Accuracy vs Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.close()

def plot_evaluation_metrics(metrics, class_names, dataset_name):
    # 1. ROC curves for each class
    plt.figure(figsize=(10, 8))
    for class_name in class_names:
        fpr, tpr = metrics['per_class'][class_name]['roc_curve']
        auc = metrics['per_class'][class_name]['auc']
        plt.plot(fpr, tpr, label=f'{class_name} (AUC = {auc:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curves - {dataset_name}')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig(f'roc_curves_{dataset_name}.png')
    plt.close()
    
    # 2. Precision-Recall curves
    plt.figure(figsize=(10, 8))
    for class_name in class_names:
        precision, recall = metrics['per_class'][class_name]['precision_recall']
        ap = metrics['per_class'][class_name]['average_precision']
        plt.plot(recall, precision, label=f'{class_name} (AP = {ap:.2f})')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curves - {dataset_name}')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig(f'precision_recall_{dataset_name}.png')
    plt.close()
    
    # 3. Confusion Matrix
    plt.figure(figsize=(12, 10))
    sns.heatmap(metrics['confusion_matrix'], annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix - {dataset_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(f'confusion_matrix_{dataset_name}.png')
    plt.close()
    
    # 4. Per-class metrics comparison
    metrics_to_plot = ['auc', 'average_precision', 'f1_score']
    x = np.arange(len(class_names))
    width = 0.25
    
    plt.figure(figsize=(15, 8))
    for i, metric in enumerate(metrics_to_plot):
        values = [metrics['per_class'][c][metric] for c in class_names]
        plt.bar(x + i*width, values, width, label=metric.replace('_', ' ').title())
    
    plt.xlabel('Classes')
    plt.ylabel('Score')
    plt.title(f'Performance Metrics by Class - {dataset_name}')
    plt.xticks(x + width, class_names, rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'per_class_metrics_{dataset_name}.png')
    plt.close()

def print_metrics_summary(metrics, dataset_name):
    print(f"\n=== {dataset_name} Evaluation Metrics ===")
    print(f"\nOverall Metrics:")
    print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
    print(f"Mean AUC: {metrics['mean_auc']:.4f}")
    print(f"Mean Average Precision: {metrics['mean_ap']:.4f}")
    print(f"Mean F1 Score: {metrics['mean_f1']:.4f}")
    
    print("\nPer-class metrics:")
    for class_name, class_metrics in metrics['per_class'].items():
        print(f"\n{class_name}:")
        print(f"  AUC-ROC: {class_metrics['auc']:.4f}")
        print(f"  Average Precision: {class_metrics['average_precision']:.4f}")
        print(f"  F1 Score: {class_metrics['f1_score']:.4f}")








def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels, _, _ in tqdm(train_loader, desc="Training"):  # Unpack the extra dataset name
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc


def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels, _, _ in tqdm(val_loader, desc="Validating"):  # Unpack the extra dataset name
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    val_loss = running_loss / len(val_loader)
    val_acc = 100. * correct / total
    
    return val_loss, val_acc


def evaluate_and_save(model, dataloader, output_file, class_names, is_test_set=False):
    model.eval()
    device = next(model.parameters()).device
    
    results = {
        'image_path': [],
        'Dataset': [],
        'predicted_class': []
    }
    
    if not is_test_set:
        results['true_label'] = []
        
    for class_name in class_names:
        results[class_name] = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            if is_test_set:
                images, img_paths, dataset_names = batch
                images = images.to(device)
            else:
                images, labels, img_paths, dataset_names = batch
                images = images.to(device)
                results['true_label'].extend([class_names[label.item()] for label in labels])
            
            outputs = model(images)
            probabilities = F.softmax(outputs, dim=1)
            
            results['image_path'].extend([img_path for img_path in img_paths])
            results['Dataset'].extend([dataset_name for dataset_name in dataset_names])
            
            probs_np = probabilities.cpu().numpy()
            pred_classes = torch.argmax(outputs, dim=1).cpu().numpy()
            results['predicted_class'].extend([class_names[idx] for idx in pred_classes])
            
            for i, class_name in enumerate(class_names):
                results[class_name].extend(probs_np[:, i].tolist())
    
    df = pd.DataFrame(results)
    
    if is_test_set:
        columns = ['image_path', 'Dataset'] + class_names + ['predicted_class']
    else:
        columns = ['image_path', 'Dataset', 'true_label'] + class_names + ['predicted_class']
    
    df = df[columns]
    df.to_excel(output_file, index=False, float_format='%.8f')
    print(f"Results saved to {output_file}")
    
    if not is_test_set:
        accuracy = (df['true_label'] == df['predicted_class']).mean() * 100
        print(f"Final Accuracy: {accuracy:.2f}%")
        
    return df

# Rest of the code remains unchanged...

def test_evaluate_and_save(model, dataloader, output_filename, class_names, is_test_set=False):
    model.eval()
    predictions = []
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            if is_test_set:
                images, img_paths = batch  # Unpack image and path for the test set
                images = images.to(device)
                outputs = model(images)
                probs = torch.softmax(outputs, dim=1)  # Predicted probabilities
                _, predicted = outputs.max(1)

                # Collect predictions with paths and class probabilities
                for img_path, prob, pred in zip(img_paths, probs, predicted):
                    predictions.append({
                        "image_path": img_path,
                        **{f"class_{i}_probability": p.item() for i, p in enumerate(prob)},
                        "predicted_class": class_names[pred.item()]
                    })
            else:
                # For train/val: expect images and labels
                images, labels = batch
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                probs = torch.softmax(outputs, dim=1)
                _, predicted = outputs.max(1)

                # Collect predictions with ground truth for train/val
                for label, prob, pred in zip(labels, probs, predicted):
                    predictions.append({
                        "true_label": class_names[label.item()],
                        **{f"class_{i}_probability": p.item() for i, p in enumerate(prob)},
                        "predicted_class": class_names[pred.item()]
                    })

    # Convert predictions to DataFrame
    df = pd.DataFrame(predictions)
    df.to_excel(output_filename, index=False)
    print(f"Results saved to {output_filename}")
    return df


def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Initialize model
    model_name = "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224"
    num_classes = 10
    model = BiomedCLIPClassifier(model_name, num_classes)
    model = model.to(device)
    
    # Training parameters
    num_epochs = 3
    batch_size = 32
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    
    # Initialize metrics tracker
    metrics_tracker = MetricsTracker()
    
    # Create datasets and dataloaders
    train_dataset = CustomImageDataset(root_dir='New_Dateset/Dataset/training', transform=model.preprocess)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    
    val_dataset = CustomImageDataset(root_dir='New_Dateset/Dataset/validation', transform=model.preprocess)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # Training phase
    print("Starting training...")
    best_val_acc = 0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        metrics_tracker.train_losses.append(train_loss)
        metrics_tracker.train_accs.append(train_acc)
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        
        # Validate
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        metrics_tracker.val_losses.append(val_loss)
        metrics_tracker.val_accs.append(val_acc)
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
        
        scheduler.step()
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
    
    # Plot training history
    plot_training_history(metrics_tracker)
    
    # Load best model for evaluation
    print("\nLoading best model for evaluation...")
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()
    
    class_names = ['Angioectasia', 'Bleeding', 'Erosion', 'Erythema', 
                   'Foreign Body', 'Lymphangiectasia', 'Normal', 'Polyp', 
                   'Ulcer', 'Worms']
    
    try:
        # Training set evaluation
        print("\nEvaluating training set...")
        train_df = evaluate_and_save(model, train_loader, 'efinaltrain_results.xlsx', class_names)
        train_metrics = evaluate_model(train_df, class_names)
        print_metrics_summary(train_metrics, "Training Set")
        plot_evaluation_metrics(train_metrics, class_names, 'training')
        
        # Validation set evaluation
        print("\nEvaluating validation set...")
        val_df = evaluate_and_save(model, val_loader, 'efinalval_results.xlsx', class_names)
        val_metrics = evaluate_model(val_df, class_names)
        print_metrics_summary(val_metrics, "Validation Set")
        plot_evaluation_metrics(val_metrics, class_names, 'validation')
        
        # Test set evaluation
        print("\nEvaluating test set...")
        test_dataset = TestDataset(root_dir='Testing set/Testing set/Images', transform=model.preprocess)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
        test_df = test_evaluate_and_save(model, test_loader, 'efinaltest_results.xlsx', class_names, is_test_set=True)
        
        print("\nAll evaluations completed successfully!")
        
    except Exception as e:
        print(f"An error occurred during evaluation: {e}")
        raise

if __name__ == "__main__":
    main()




  checkpoint = torch.load(checkpoint_path, map_location=map_location)


Starting training...



Epoch 1/3


Training: 100%|██████████| 1176/1176 [06:29<00:00,  3.02it/s]


Train Loss: 0.3927 | Train Acc: 87.67%


Validating: 100%|██████████| 505/505 [00:53<00:00,  9.38it/s]


Val Loss: 0.2621 | Val Acc: 91.46%

New best model saved with validation accuracy: 91.46%



Epoch 2/3


Training: 100%|██████████| 1176/1176 [06:29<00:00,  3.02it/s]


Train Loss: 0.2061 | Train Acc: 93.26%


Validating: 100%|██████████| 505/505 [00:53<00:00,  9.36it/s]


Val Loss: 0.2369 | Val Acc: 92.36%

New best model saved with validation accuracy: 92.36%



Epoch 3/3


Training: 100%|██████████| 1176/1176 [06:29<00:00,  3.02it/s]


Train Loss: 0.1108 | Train Acc: 96.17%


Validating: 100%|██████████| 505/505 [00:53<00:00,  9.38it/s]


Val Loss: 0.1822 | Val Acc: 94.04%

New best model saved with validation accuracy: 94.04%



Loading best model for evaluation...



  model.load_state_dict(torch.load('best_model.pth'))




Evaluating training set...


Evaluating: 100%|██████████| 1176/1176 [02:05<00:00,  9.40it/s]


Results saved to efinaltrain_results.xlsx

Final Accuracy: 97.75%



=== Training Set Evaluation Metrics ===



Overall Metrics:

Balanced Accuracy: 0.9388

Mean AUC: 0.9990

Mean Average Precision: 0.9774

Mean F1 Score: 0.9389



Per-class metrics:



Angioectasia:

  AUC-ROC: 0.9984

  Average Precision: 0.9732

  F1 Score: 0.9298



Bleeding:

  AUC-ROC: 0.9997

  Average Precision: 0.9870

  F1 Score: 0.9298



Erosion:

  AUC-ROC: 0.9978

  Average Precision: 0.9717

  F1 Score: 0.9115



Erythema:

  AUC-ROC: 0.9972

  Average Precision: 0.8956

  F1 Score: 0.8101



Foreign Body:

  AUC-ROC: 0.9998

  Average Precision: 0.9919

  F1 Score: 0.9499



Lymphangiectasia:

  AUC-ROC: 0.9992

  Average Precision: 0.9878

  F1 Score: 0.9637



Normal:

  AUC-ROC: 0.9996

  Average Precision: 0.9999

  F1 Score: 0.9948



Polyp:

  AUC-ROC: 0.9986

  Average Precision: 0.9669

  F1 Score: 0.8998



Ulcer:

  AUC-ROC: 1.0000

  Average Precision: 1.0000

  F1 Score: 0.9992



Worms:

  

Evaluating: 100%|██████████| 505/505 [00:53<00:00,  9.41it/s]


Results saved to efinalval_results.xlsx

Final Accuracy: 94.06%



=== Validation Set Evaluation Metrics ===



Overall Metrics:

Balanced Accuracy: 0.8464

Mean AUC: 0.9940

Mean Average Precision: 0.9093

Mean F1 Score: 0.8539



Per-class metrics:



Angioectasia:

  AUC-ROC: 0.9919

  Average Precision: 0.9074

  F1 Score: 0.8390



Bleeding:

  AUC-ROC: 0.9963

  Average Precision: 0.9352

  F1 Score: 0.8717



Erosion:

  AUC-ROC: 0.9865

  Average Precision: 0.8681

  F1 Score: 0.7714



Erythema:

  AUC-ROC: 0.9896

  Average Precision: 0.7125

  F1 Score: 0.6299



Foreign Body:

  AUC-ROC: 0.9971

  Average Precision: 0.9572

  F1 Score: 0.8886



Lymphangiectasia:

  AUC-ROC: 0.9951

  Average Precision: 0.9476

  F1 Score: 0.8919



Normal:

  AUC-ROC: 0.9960

  Average Precision: 0.9988

  F1 Score: 0.9809



Polyp:

  AUC-ROC: 0.9874

  Average Precision: 0.7706

  F1 Score: 0.7018



Ulcer:

  AUC-ROC: 0.9999

  Average Precision: 0.9972

  F1 Score: 0.9791



Worms:

  

Evaluating: 100%|██████████| 138/138 [00:15<00:00,  8.92it/s]


Results saved to efinaltest_results.xlsx



All evaluations completed successfully!
