In [None]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_train.csv')

# Count rows where 'Final Label' is 1 and 0
count_label_1 = len(df[df['Final Label'] == 1])
count_label_0 = len(df[df['Final Label'] == 0])

print(f"Count of rows with Final Label == 1: {count_label_1}")
print(f"Count of rows with Final Label == 0: {count_label_0}")


In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import autoaugment

def train_transform():
    return transforms.Compose([
        transforms.Resize((384, 384), interpolation=transforms.InterpolationMode.BICUBIC),
        autoaugment.AutoAugment(autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

def test_transform():
    return transforms.Compose([
        transforms.Resize((384, 384), interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

class GlaucomaDataset(Dataset):
    """
    Args:
        dataframe (DataFrame): DataFrame containing the dataset information.
        img_folder (string): Directory with all the images.
        transform (callable, optional): Optional transform to be applied on a sample.
        extra_features (list of str, optional): Column names for the extra features.
    """
    def __init__(self, dataframe, img_folder, transform=None, extra_features=None):
        self.dataframe = dataframe
        self.img_folder = img_folder
        self.transform = transform
        self.extra_features = extra_features
        
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_id = self.dataframe.iloc[idx]['Eye ID']
        for ext in ['.JPG','.JPEG', '.PNG', '.png', '.jpg', '.jpeg']:
            img_path = os.path.join(self.img_folder, f"{img_id}{ext}")
            if os.path.exists(img_path):
                break
        else:
            raise FileNotFoundError(f"No image found for ID {img_id} with any supported extension.")

        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)

        # Handling extra features 
        if self.extra_features == None :
            img_class = self.dataframe.iloc[idx]['Final Label']
            labels = torch.tensor(img_class, dtype=torch.float32)
        else:           
            extra_labels = self.dataframe.iloc[idx][self.extra_features].values.astype(float)
            labels = torch.tensor(extra_labels, dtype=torch.float32)
        
        return image, labels

In [None]:
from PIL import Image
image = Image.open('/kaggle/input/roi-images-hda/ROI_images/TRAIN000054.JPG')
print(image.size)

In [None]:
# import os
# import re
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim
# from torch.utils.data import DataLoader, random_split
# from torchvision.models import vit_b_16, ViT_B_16_Weights
# from torch.cuda.amp import GradScaler, autocast
# from tqdm import tqdm
# from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# model_name = "ViT_RG_ROI"
# model_save_directory = f'/kaggle/working/model/{model_name}'
# img_folder = '/kaggle/input/roi-images-hda/ROI_images'
# train_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_train.csv')

# best_model_directory = os.path.join('/kaggle/working/', 'best_model')


In [None]:
# import os
# import re
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim
# from torch.utils.data import DataLoader, random_split
# from torchvision.models import vit_b_16, ViT_B_16_Weights
# from torch.cuda.amp import GradScaler, autocast
# from tqdm import tqdm
# from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# ################### Configuration ###################
# model_name = "ViT_RG_ROI"
# model_save_directory = f'/kaggle/working/model/{model_name}'
# img_folder = '/kaggle/input/roi-images-hda/ROI_images'
# train_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_train.csv')

# best_model_directory = os.path.join('/kaggle/working/', 'best_model')

# if not os.path.exists(best_model_directory):
#     os.makedirs(best_model_directory)

# # Hyperparameters for early stopping
# eval_every = 5  # Evaluate on validation set every 5 epochs
# patience = 5    # Early stopping patience
# num_epochs = 100  # Total training epochs

# # Check for or create save directory
# if not os.path.exists(model_save_directory):
#     os.makedirs(model_save_directory)

# ################### Dataset and Dataloaders ###################
# # Initialize dataset with transformations
# train_dataset = GlaucomaDataset(dataframe=train_df, img_folder=img_folder, transform=train_transform(), extra_features=None)

# # Split train_dataset into training and validation sets
# train_size = int(0.8 * len(train_dataset))
# val_size = len(train_dataset) - train_size
# train_data, val_data = random_split(train_dataset, [train_size, val_size])

# # Dataloaders for training and validation
# train_loader = DataLoader(train_data, batch_size=20, shuffle=True, num_workers=8)
# val_loader = DataLoader(val_data, batch_size=20, shuffle=False, num_workers=8)

# ################### Model Initialization ###################
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1
# model = vit_b_16(weights=weights)

# # Update the model's final layer for binary classification
# num_features = model.heads.head.in_features
# model.heads.head = nn.Linear(num_features, 1)

# # Freeze all layers except the classifier head
# for name, param in model.named_parameters():
#     if 'heads.head' not in name:
#         param.requires_grad = False

# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs!")
#     model = nn.DataParallel(model)

# model.to(device)

# ################### Loss and Optimizer ###################
# # Compute class weights for weighted BCE loss
# negative_class = len(train_df[train_df['Final Label'] == 0])
# positive_class = len(train_df[train_df['Final Label'] == 1])
# pos_weight_value = negative_class / positive_class
# pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float, device=device)
# criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)

# # Parameter groups for differential learning rates
# if torch.cuda.device_count() > 1:
#     base_params = [p for n, p in model.module.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.module.heads.head.parameters()
# else:
#     base_params = [p for n, p in model.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.heads.head.parameters()

# optimizer = torch.optim.AdamW([
#     {'params': base_params, 'lr': 1e-5, 'weight_decay': 1e-4},
#     {'params': classifier_params, 'lr': 1e-4, 'weight_decay': 1e-4}
# ])

# ################### Load Model Checkpoint (if exists) ###################
# latest_model_path = None
# start_epoch = 0

# for file in os.listdir(model_save_directory):
#     if file.startswith(f"{model_name}_epoch_") and file.endswith(".pth"):
#         epoch_num = int(re.findall(r"\d+", file)[0])
#         if epoch_num > start_epoch:
#             start_epoch = epoch_num
#             latest_model_path = os.path.join(model_save_directory, file)
#             print(latest_model_path)

# if latest_model_path:
#     model.load_state_dict(torch.load(latest_model_path))
#     print(f"Loaded model from {latest_model_path}, continuing training from epoch {start_epoch+1}")
# else:
#     print("No saved model found, starting training from scratch")


# ################### Training Loop with Early Stopping ###################
# scaler = GradScaler()
# best_val_auc = 0  # Track best AUC score for early stopping
# epochs_no_improve = 0

# for epoch in range(start_epoch, num_epochs):
#     model.train()
#     train_loss = 0.0

#     progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
#     for batch_idx, (images, labels) in progress_bar:
#         images, labels = images.to(device), labels.to(device)
        
#         optimizer.zero_grad()

#         with autocast():
#             outputs = model(images)
#             loss = criterion(outputs.squeeze(), labels.float())

#         scaler.scale(loss).backward()
#         scaler.step(optimizer)
#         scaler.update()

#         train_loss += loss.item() * images.size(0)
#         progress_bar.set_postfix({'train_loss': loss.item()})

#     train_loss /= len(train_loader.dataset)

#     # Save model after each epoch
#     if (epoch + 1) % 10 == 0:
#         epoch_save_path = os.path.join(model_save_directory, f"{model_name}_epoch_{epoch + 1}.pth")
#         torch.save(model.state_dict(), epoch_save_path)
#         print(f"Model saved to {epoch_save_path} after epoch {epoch + 1}")

#     # Validation and Early Stopping
#     if (epoch + 1) % eval_every == 0:
#         model.eval()
#         val_loss = 0.0
#         all_labels = []
#         all_preds = []

#         with torch.no_grad():
#             for images, labels in val_loader:
#                 images, labels = images.to(device), labels.to(device)
#                 with autocast():
#                     outputs = model(images)
#                     loss = criterion(outputs.squeeze(), labels.float())
                
#                 val_loss += loss.item() * images.size(0)
#                 preds = torch.sigmoid(outputs).squeeze().cpu().numpy()
                
#                 all_labels.extend(labels.cpu().numpy())
#                 all_preds.extend(preds)

#         val_loss /= len(val_loader.dataset)

#         # Calculate metrics
#         val_auc = roc_auc_score(all_labels, all_preds)
#         val_f1 = f1_score(all_labels, (np.array(all_preds) > 0.5).astype(int))
#         val_precision = precision_score(all_labels, (np.array(all_preds) > 0.5).astype(int))
#         val_recall = recall_score(all_labels, (np.array(all_preds) > 0.5).astype(int))

#         print(f"Validation AUC after epoch {epoch + 1}: {val_auc}")
#         print(f"Validation F1 Score: {val_f1}, Precision: {val_precision}, Recall: {val_recall}")

#         # Early stopping check
#         if val_auc > best_val_auc:
#             best_val_auc = val_auc
#             epochs_no_improve = 0
#             print(f"New best AUC: {best_val_auc}")
            
#             best_model_path = os.path.join(best_model_directory, f"{model_name}_best.pth")
#             torch.save(model.state_dict(), best_model_path)
#             print(f"Best model saved to {best_model_path}")
#         else:
#             epochs_no_improve += 1
#             print(f"No improvement for {epochs_no_improve} validation checks.")

#         if epochs_no_improve >= patience:
#             print(f"Early stopping triggered. No improvement for {patience} validation checks.")
#             break


# THIS FOR GENERATING GRAPHS WHILE TRAINING

In [None]:
import os
import re
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim
from torch.utils.data import DataLoader, random_split
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, precision_recall_curve
import matplotlib.pyplot as plt
import seaborn as sns

################### Configuration ###################
model_name = "ViT_RG_ROI"
model_save_directory = f'/kaggle/working/model/{model_name}'
img_folder = '/kaggle/input/roi-images-hda/ROI_images'
train_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_train.csv')

best_model_directory = os.path.join('/kaggle/working/', 'best_model')

if not os.path.exists(best_model_directory):
    os.makedirs(best_model_directory)

# Hyperparameters for early stopping
eval_every = 5  # Evaluate on validation set every 5 epochs
patience = 5    # Early stopping patience
num_epochs = 100  # Total training epochs

# Check for or create save directory
if not os.path.exists(model_save_directory):
    os.makedirs(model_save_directory)

################### Dataset and Dataloaders ###################
# Initialize dataset with transformations
train_dataset = GlaucomaDataset(dataframe=train_df, img_folder=img_folder, transform=train_transform(), extra_features=None)

# Split train_dataset into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_data, val_data = random_split(train_dataset, [train_size, val_size])

# Dataloaders for training and validation
train_loader = DataLoader(train_data, batch_size=20, shuffle=True, num_workers=8)
val_loader = DataLoader(val_data, batch_size=20, shuffle=False, num_workers=8)

################### Model Initialization ###################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1
model = vit_b_16(weights=weights)

# Update the model's final layer for binary classification
num_features = model.heads.head.in_features
model.heads.head = nn.Linear(num_features, 1)

# Freeze all layers except the classifier head
for name, param in model.named_parameters():
    if 'heads.head' not in name:
        param.requires_grad = False

if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)

model.to(device)

################### Loss and Optimizer ###################
# Compute class weights for weighted BCE loss
negative_class = len(train_df[train_df['Final Label'] == 0])
positive_class = len(train_df[train_df['Final Label'] == 1])
pos_weight_value = negative_class / positive_class
pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float, device=device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)

# Parameter groups for differential learning rates
if torch.cuda.device_count() > 1:
    base_params = [p for n, p in model.module.named_parameters() if 'heads.head' not in n]
    classifier_params = model.module.heads.head.parameters()
else:
    base_params = [p for n, p in model.named_parameters() if 'heads.head' not in n]
    classifier_params = model.heads.head.parameters()

optimizer = torch.optim.AdamW([
    {'params': base_params, 'lr': 1e-5, 'weight_decay': 1e-4},
    {'params': classifier_params, 'lr': 1e-4, 'weight_decay': 1e-4}
])

################### Load Model Checkpoint (if exists) ###################
latest_model_path = None
start_epoch = 0

for file in os.listdir(model_save_directory):
    if file.startswith(f"{model_name}_epoch_") and file.endswith(".pth"):
        epoch_num = int(re.findall(r"\d+", file)[0])
        if epoch_num > start_epoch:
            start_epoch = epoch_num
            latest_model_path = os.path.join(model_save_directory, file)
            print(latest_model_path)

if latest_model_path:
    model.load_state_dict(torch.load(latest_model_path))
    print(f"Loaded model from {latest_model_path}, continuing training from epoch {start_epoch+1}")
else:
    print("No saved model found, starting training from scratch")

################### Training Loop with Early Stopping ###################
scaler = GradScaler()
best_val_auc = 0  # Track best AUC score for early stopping
epochs_no_improve = 0

# Initialize lists to track metrics
train_losses = []
val_losses = []
val_aucs = []
val_f1s = []
val_precisions = []
val_recalls = []
val_epochs = []  # Track epochs for validation metrics

for epoch in range(start_epoch, num_epochs):
    model.train()
    train_loss = 0.0

    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    for batch_idx, (images, labels) in progress_bar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast():
            outputs = model(images)
            loss = criterion(outputs.squeeze(), labels.float())

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item() * images.size(0)
        progress_bar.set_postfix({'train_loss': loss.item()})

    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)  # Store train loss for each epoch

    # Validation and Early Stopping
    if (epoch + 1) % eval_every == 0:
        val_epochs.append(epoch + 1)  # Track the epochs at which validation is done
        model.eval()
        val_loss = 0.0
        all_labels = []
        all_preds = []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                with autocast():
                    outputs = model(images)
                    loss = criterion(outputs.squeeze(), labels.float())

                val_loss += loss.item() * images.size(0)
                preds = torch.sigmoid(outputs).squeeze().cpu().numpy()

                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds)

        val_loss /= len(val_loader.dataset)
        val_losses.append(val_loss)  # Store validation loss for each eval cycle

        # Calculate metrics
        val_auc = roc_auc_score(all_labels, all_preds)
        val_f1 = f1_score(all_labels, (np.array(all_preds) > 0.5).astype(int))
        val_precision = precision_score(all_labels, (np.array(all_preds) > 0.5).astype(int))
        val_recall = recall_score(all_labels, (np.array(all_preds) > 0.5).astype(int))

        # Append metrics for plotting
        val_aucs.append(val_auc)
        val_f1s.append(val_f1)
        val_precisions.append(val_precision)
        val_recalls.append(val_recall)

        print(f"Validation AUC after epoch {epoch + 1}: {val_auc}")
        print(f"Validation F1 Score: {val_f1}, Precision: {val_precision}, Recall: {val_recall}")

        # Early stopping check
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            epochs_no_improve = 0
            print(f"New best AUC: {best_val_auc}")

            best_model_path = os.path.join(best_model_directory, f"{model_name}_best.pth")
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved to {best_model_path}")
        else:
            epochs_no_improve += 1
            print(f"No improvement for {epochs_no_improve} validation checks.")

        if epochs_no_improve >= patience:
            print(f"Early stopping triggered. No improvement for {patience} validation checks.")
            break

# Plotting metrics after training
epochs = range(1, len(train_losses) + 1)  # Training epochs

plt.figure(figsize=(12, 8))

# Plot training and validation loss
plt.subplot(2, 2, 1)
plt.plot(epochs, train_losses, label='Training Loss')
plt.plot(val_epochs, val_losses, label='Validation Loss')  # Use val_epochs for validation metrics
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot validation AUC
plt.subplot(2, 2, 2)
plt.plot(val_epochs, val_aucs, label='Validation AUC')  # Use val_epochs for validation metrics
plt.xlabel('Epochs')
plt.ylabel('AUC')
plt.title('Validation AUC over Epochs')
plt.legend()

# Plot F1 Score
plt.subplot(2, 2, 3)
plt.plot(val_epochs, val_f1s, label='Validation F1 Score')  # Use val_epochs for validation metrics
plt.xlabel('Epochs')
plt.ylabel('F1 Score')
plt.title('Validation F1 Score over Epochs')
plt.legend()

# Plot Precision and Recall
plt.subplot(2, 2, 4)
plt.plot(val_epochs, val_precisions, label='Validation Precision')  # Use val_epochs for validation metrics
plt.plot(val_epochs, val_recalls, label='Validation Recall')  # Use val_epochs for validation metrics
plt.xlabel('Epochs')
plt.ylabel('Score')
plt.title('Validation Precision and Recall over Epochs')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# import os
# import re
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim
# from torch.utils.data import DataLoader, random_split
# from torchvision.models import vit_l_16, ViT_L_16_Weights
# from torch.cuda.amp import GradScaler, autocast
# from tqdm import tqdm


# ################### Configuration ###################
# model_name = "ViT_RG_ROI"
# model_save_directory = f'/kaggle/working/model/{model_name}'
# img_folder = '/kaggle/input/roi-images-hda/ROI_images'
# train_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_train.csv')
# # val_df = pd.read_csv('./Datasets/glaucoma_masks_val.csv')  # if you use a separate validation set

# best_model_directory = os.path.join('/kaggle/working/best_model', 'best_model')
# if not os.path.exists(best_model_directory):
#     os.makedirs(best_model_directory)

# # Hyperparameters for early stopping
# eval_every = 5  # Evaluate on validation set every 5 epochs
# patience = 5    # Early stopping patience
# num_epochs = 100  # Total training epochs

# # Check for or create save directory
# if not os.path.exists(model_save_directory):
#     os.makedirs(model_save_directory)

# ################### Dataset and Dataloaders ###################
# # Initialize dataset with transformations
# train_dataset = GlaucomaDataset(dataframe=train_df, img_folder=img_folder, transform=train_transform(), extra_features=None)

# # Split train_dataset into training and validation sets
# train_size = int(0.8 * len(train_dataset))
# val_size = len(train_dataset) - train_size
# train_data, val_data = random_split(train_dataset, [train_size, val_size])

# # Dataloaders for training and validation
# train_loader = DataLoader(train_data, batch_size=20, shuffle=True, num_workers=8)
# val_loader = DataLoader(val_data, batch_size=20, shuffle=False, num_workers=8)

# ################### Model Initialization ###################
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# weights = ViT_L_16_Weights.IMAGENET1K_SWAG_E2E_V1
# model = vit_l_16(weights=weights)

# # Update the model's final layer for binary classification
# num_features = model.heads.head.in_features
# model.heads.head = nn.Linear(num_features, 1)

# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs!")
#     model = nn.DataParallel(model)

# model.to(device)
# # Check if ViT weights are frozen or trainable
# for name, param in model.named_parameters():
#     print(f"{name}: {'trainable' if param.requires_grad else 'frozen'}")


# ################### Loss and Optimizer ###################
# # Compute class weights for weighted BCE loss
# negative_class = len(train_df[train_df['Final Label'] == 0])
# positive_class = len(train_df[train_df['Final Label'] == 1])
# pos_weight_value = negative_class / positive_class
# pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float, device=device)
# criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)

# # Parameter groups for differential learning rates
# if torch.cuda.device_count() > 1:
#     base_params = [p for n, p in model.module.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.module.heads.head.parameters()
# else:
#     base_params = [p for n, p in model.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.heads.head.parameters()

# optimizer = torch.optim.AdamW([
#     {'params': base_params, 'lr': 1e-5, 'weight_decay': 1e-4},
#     {'params': classifier_params, 'lr': 1e-4, 'weight_decay': 1e-4}
# ])

# ################### Load Model Checkpoint (if exists) ###################
# latest_model_path = None
# start_epoch = 0
# for file in os.listdir(model_save_directory):
#     if file.startswith(f"{model_name}_epoch_") and file.endswith(".pth"):
#         epoch_num = int(re.findall(r"\d+", file)[0])
#         if epoch_num > start_epoch:
#             start_epoch = epoch_num
#             latest_model_path = os.path.join(model_save_directory, file)

# if latest_model_path:
#     model.load_state_dict(torch.load(latest_model_path))
#     print(f"Loaded model from {latest_model_path}, continuing training from epoch {start_epoch + 1}")
# else:
#     print("No saved model found, starting training from scratch")

# ################### Training Loop with Early Stopping ###################
# scaler = GradScaler()
# best_val_loss = float("inf")
# epochs_no_improve = 0

# for epoch in range(start_epoch, num_epochs):
#     model.train()
#     train_loss = 0.0

#     progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
#     for batch_idx, (images, labels) in progress_bar:
#         images, labels = images.to(device), labels.to(device)
        
#         optimizer.zero_grad()

#         with autocast():
#             outputs = model(images)
#             loss = criterion(outputs.squeeze(), labels.float())

#         scaler.scale(loss).backward()
#         scaler.step(optimizer)
#         scaler.update()

#         train_loss += loss.item() * images.size(0)
#         progress_bar.set_postfix({'train_loss': loss.item()})

#     train_loss /= len(train_loader.dataset)

#     # Save model after each epoch
#     epoch_save_path = os.path.join(model_save_directory, f"{model_name}_epoch_{epoch + 1}.pth")
#     torch.save(model.state_dict(), epoch_save_path)
#     print(f"Model saved to {epoch_save_path} after epoch {epoch + 1}")

#     # Validation and Early Stopping
#     if (epoch + 1) % eval_every == 0:
#         model.eval()
#         val_loss = 0.0
#         with torch.no_grad():
#             for images, labels in val_loader:
#                 images, labels = images.to(device), labels.to(device)
#                 with autocast():
#                     outputs = model(images)
#                     loss = criterion(outputs.squeeze(), labels.float())
#                 val_loss += loss.item() * images.size(0)
        
#         val_loss /= len(val_loader.dataset)
#         print(f"Validation loss after epoch {epoch + 1}: {val_loss}")

#         # Early stopping check
#         if val_loss < best_val_loss:
#             best_val_loss = val_loss
#             epochs_no_improve = 0
#             print(f"New best validation loss: {best_val_loss}")
            
#             best_model_path = os.path.join(best_model_directory, f"{model_name}_best.pth")
#             torch.save(model.state_dict(), best_model_path)
#             print(f"Best model saved to {best_model_path}")
#         else:
#             epochs_no_improve += 1
#             print(f"No improvement for {epochs_no_improve} validation checks.")

#         if epochs_no_improve >= patience:
#             print(f"Early stopping triggered. No improvement for {patience} validation checks.")
#             break


In [None]:
# import os
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim
# from torchvision.models import vit_l_16, ViT_L_16_Weights
# from torchvision.models import vit_b_16, ViT_B_16_Weights
# from torch.utils.data import DataLoader
# from torch.cuda.amp import GradScaler, autocast
# from tqdm import tqdm
# import re



# ################ Adjust this section as needed ################
# model_name = "ViT_RG_NO_ROI"
# model_save_directory = f'/kaggle/working/model/{model_name}'
# img_folder = '/kaggle/input/preprocessed-image-hda-without-roi/preprocessed_images' 
# train_df = pd.read_csv('/kaggle/input/images-hda-before-preprocess/glaucoma_no_mask_train.csv')
# ###############################################################

# if not os.path.exists(model_save_directory):
#     os.makedirs(model_save_directory)

# train_dataset = GlaucomaDataset(dataframe=train_df, img_folder=img_folder, transform=train_transform(), extra_features=None)
# train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True, num_workers=8) 

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # weights = ViT_L_16_Weights.IMAGENET1K_SWAG_E2E_V1

# # model = vit_l_16(weights=weights)
# weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1
# model = vit_b_16(weights=weights)

# num_features = model.heads.head.in_features  
# model.heads.head = nn.Linear(num_features, 1)

# if torch.cuda.device_count() > 1:
#     print(f"Let's use {torch.cuda.device_count()} GPUs!")
#     model = nn.DataParallel(model)

# model.to(device)

# # Define weights and weighted BCELoss
# negative_class = len(train_df[train_df['Final Label'] == 0])
# positive_class = len(train_df[train_df['Final Label'] == 1])
# pos_weight_value = negative_class / positive_class
# pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float, device=device)
# criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)

# # Accessing parameters after applying DataParallel
# if torch.cuda.device_count() > 1:
#     base_params = [p for n, p in model.module.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.module.heads.head.parameters()
# else:
#     base_params = [p for n, p in model.named_parameters() if 'heads.head' not in n]
#     classifier_params = model.heads.head.parameters()

# # Define an AdamW optimizer with differential learning rate and weight decay
# optimizer = torch.optim.AdamW([
#     {'params': base_params, 'lr': 1e-5, 'weight_decay': 1e-4},
#     {'params': classifier_params, 'lr': 1e-4, 'weight_decay': 1e-4}
# ])

# # Load the latest checkpoint if exists
# latest_model_path = None
# start_epoch = 0

# patience = 10  # Number of epochs to wait before stopping when there is no improvement
# epochs_no_improve = 0  

# for file in os.listdir(model_save_directory):
#     if file.startswith(f"{model_name}_epoch_") and file.endswith(".pth"):
#         epoch_num = int(re.findall(r"\d+", file)[0])
#         if epoch_num > start_epoch:
#             start_epoch = epoch_num
#             latest_model_path = os.path.join(model_save_directory, file)

# if latest_model_path:
#     model.load_state_dict(torch.load(latest_model_path))
#     print(f"Loaded model from {latest_model_path}, continuing training from epoch {start_epoch+1}")
# else:
#     print("No saved model found, starting training from scratch")

# scaler = GradScaler()
# num_epochs = 100
# best_loss = float('inf')  # Initialize best loss for validation-based saving
# save_interval = 10        # Adjust the interval to save model periodically

# # Training and validation loop
# for epoch in range(start_epoch, num_epochs):
#     model.train()
#     train_loss = 0.0

#     progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}")
#     for batch_idx, (images, labels) in progress_bar:
#         images, labels = images.to(device), labels.to(device)

#         optimizer.zero_grad()

#         with autocast():
#             outputs = model(images)
#             loss = criterion(outputs.squeeze(), labels.float())

#         scaler.scale(loss).backward()
#         scaler.step(optimizer)
#         scaler.update()

#         train_loss += loss.item() * images.size(0)
#         progress_bar.set_postfix({'train_loss': loss.item()})

#     train_loss /= len(train_loader.dataset)
#     print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}")

#     # Save model if current loss is the best
#     best_model_save_directory = '/kaggle/working/best_models/'
#     if not os.path.exists(best_model_save_directory):
#         os.makedirs(best_model_save_directory)

#     # Use best_model_save_directory instead of model_save_directory
# #     best_model_path = os.path.join(best_model_save_directory, f"{model_name}_best.pth")

#     if train_loss < best_loss:
#         best_loss = train_loss

#         best_model_path = os.path.join(best_model_save_directory, f"{model_name}_best.pth")
#         torch.save(model.state_dict(), best_model_path)
#         print(f"New best model saved to {best_model_path} with loss {best_loss:.4f}")
#         epochs_no_improve = 0 
#     else:
#         epochs_no_improve += 1
#         print(f"No improvement in loss for {epochs_no_improve} epoch(s).")

#     # Save every `save_interval` epochs
#     if (epoch + 1) % save_interval == 0:
#         epoch_save_path = os.path.join(model_save_directory, f"{model_name}_epoch_{epoch+1}.pth")
#         torch.save(model.state_dict(), epoch_save_path)
#         print(f"Model checkpoint saved to {epoch_save_path} at epoch {epoch+1}")
    
#     if epochs_no_improve >= patience:
#         print(f"Early stopping triggered. No improvement in loss for {patience} consecutive epochs.")
#         break


# for Best model


In [None]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

# Assuming test_df and img_folder paths are defined, and you have a test dataset loader
test_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_test.csv')  # Path to test CSV
test_dataset = GlaucomaDataset(dataframe=test_df, img_folder=img_folder, transform=test_transform(), extra_features=None)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=8)

# best_model_directory = '/kaggle/input/model-for-hda/best_model/'

# Load the best model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

# Update the model's final layer for binary classification
num_features = model.heads.head.in_features
model.heads.head = nn.Linear(num_features, 1)

best_model_path = os.path.join(best_model_directory, f"{model_name}_best.pth")
model.load_state_dict(torch.load(best_model_path))
model.to(device)
model.eval()

# Initialize lists to collect true labels and predictions
all_labels = []
all_preds = []

# Testing loop
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Get predictions
        outputs = model(images)
        preds = torch.sigmoid(outputs).squeeze().cpu().numpy()  # Apply sigmoid to get probabilities
        
        # Store labels and predictions
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds)

# Convert predictions to binary classes with a threshold of 0.5
binary_preds = (np.array(all_preds) > 0.5).astype(int)

# Calculate metrics
test_accuracy = accuracy_score(all_labels, binary_preds)
test_precision = precision_score(all_labels, binary_preds)
test_recall = recall_score(all_labels, binary_preds)
test_f1 = f1_score(all_labels, binary_preds)
test_auc = roc_auc_score(all_labels, all_preds)

# Print the results
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")
print(f"Test AUC-ROC: {test_auc:.4f}")


In [None]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming test_df and img_folder paths are defined, and you have a test dataset loader
test_df = pd.read_csv('/kaggle/input/roi-images-hda/masks_csvs/glaucoma_masks_test.csv')  # Path to test CSV
test_dataset = GlaucomaDataset(dataframe=test_df, img_folder=img_folder, transform=test_transform(), extra_features=None)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=8)

# best_model_directory = '/kaggle/input/model-for-hda/best_model/'

# Load the best model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

# Update the model's final layer for binary classification
num_features = model.heads.head.in_features
model.heads.head = nn.Linear(num_features, 1)

best_model_path = os.path.join(best_model_directory, f"{model_name}_best.pth")
model.load_state_dict(torch.load(best_model_path))
model.to(device)
model.eval()

# Initialize lists to collect true labels and predictions
all_labels = []
all_preds = []

# Testing loop
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Get predictions
        outputs = model(images)
        preds = torch.sigmoid(outputs).squeeze().cpu().numpy()  # Apply sigmoid to get probabilities
        
        # Store labels and predictions
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds)

# Convert predictions to binary classes with a threshold of 0.5
binary_preds = (np.array(all_preds) > 0.5).astype(int)

# Calculate metrics
test_accuracy = accuracy_score(all_labels, binary_preds)
test_precision = precision_score(all_labels, binary_preds)
test_recall = recall_score(all_labels, binary_preds)
test_f1 = f1_score(all_labels, binary_preds)
test_auc = roc_auc_score(all_labels, all_preds)
conf_matrix = confusion_matrix(all_labels, binary_preds)

# Print the results
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")
print(f"Test AUC-ROC: {test_auc:.4f}")

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Non-Referral Glaucoma", "Referral Glaucoma"], 
            yticklabels=["Non-Referral Glaucoma", "Referral Glaucoma"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
# import os
# import pandas as pd
# import torch
# import torch.nn as nn
# from torchvision.models import vit_l_16, ViT_L_16_Weights
# from torch.utils.data import DataLoader
# import numpy as np
# from sklearn.metrics import roc_auc_score, roc_curve
# from tqdm import tqdm

# # from data_utils import GlaucomaDataset, test_transform

# ####### Adjust this section as needed ############################
# # model_save_directory = './model/ViT_glaucoma_ROI'    
# # img_folder = './ROI_images' 
# # test_df = pd.read_csv('./Datasets/glaucoma_masks_test.csv')

# # Unmute to validate ViT without ROI
# model_save_directory = '/kaggle/working/model'  
# img_folder = '/kaggle/input/preprocessed-image-hda-without-roi/preprocessed_images' 
# test_df = pd.read_csv('/kaggle/input/images-hda-before-preprocess/glaucoma_no_mask_test.csv')
# #################################################################

# # Load test data
# test_dataset = GlaucomaDataset(dataframe=test_df, img_folder=img_folder, transform=test_transform)
# test_loader = DataLoader(test_dataset, batch_size=12, shuffle=True, num_workers=2)

# # Model setup
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # model = vit_l_16(weights=ViT_L_16_Weights.IMAGENET1K_SWAG_E2E_V1)
# weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1                                                                                               
# model = vit_b_16(weights=weights)
# model.heads.head = nn.Linear(model.heads.head.in_features, 1)
# model.to(device)

# def load_model(model, model_path, device):
#     state_dict = torch.load(model_path, map_location=device)
#     if any(k.startswith('module.') for k in state_dict.keys()):
#         new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
#     else:
#         new_state_dict = state_dict
#     model.load_state_dict(new_state_dict)

# def compute_metrics(actuals, probabilities):
#     fpr, tpr, thresholds = roc_curve(actuals, probabilities)
#     target_specificity = 0.95
#     target_fpr = 1 - target_specificity

#     # Find the first threshold where FPR is <= target FPR
#     index = np.where(fpr <= target_fpr)[0][0]
#     optimal_threshold = thresholds[index]
#     predictions = (probabilities >= optimal_threshold).astype(int)

#     TP = np.sum((actuals == 1) & (predictions == 1))
#     TN = np.sum((actuals == 0) & (predictions == 0))
#     FP = np.sum((actuals == 0) & (predictions == 1))
#     FN = np.sum((actuals == 1) & (predictions == 0))

#     sensitivity = TP / (TP + FN) if TP + FN > 0 else 0
#     specificity = TN / (TN + FP) if TN + FP > 0 else 0
#     accuracy = (TP + TN) / (TP + TN + FP + FN) if TP + TN + FP + FN > 0 else 0
#     auc = roc_auc_score(actuals, probabilities) if len(np.unique(actuals)) > 1 else 0
#     return sensitivity, specificity, accuracy, auc, optimal_threshold

# # Evaluate all models in the directory
# for filename in os.listdir(model_save_directory):
#     if filename.endswith(".pth"):
#         model_path = os.path.join(model_save_directory, filename)
#         load_model(model, model_path, device)
#         model.eval()

#         all_labels = []
#         all_probabilities = []
#         with torch.no_grad():
#             for images, labels in tqdm(test_loader, desc=f"Evaluating {filename}", leave=True):
#                 images = images.to(device)
#                 outputs = model(images)
#                 probabilities = torch.sigmoid(outputs).squeeze()
#                 all_labels.extend(labels.numpy())
#                 all_probabilities.extend(probabilities.cpu().numpy())

#         sensitivity, specificity, accuracy, auc_score, optimal_threshold = compute_metrics(np.array(all_labels), np.array(all_probabilities))
#         print(f"Model: {filename}")
#         print(f"Sensitivity: {sensitivity:.4f}")
#         print(f"Specificity: {specificity:.4f}")
#         print(f"Accuracy: {accuracy:.4f}")
#         print(f"AUC Score: {auc_score:.4f}")
#         print(f"Optimal Threshold: {optimal_threshold:.4f}\n")