In [None]:
# importing the libraries
import os
import sys
import pandas as pd
import torch
from PIL import Image
import numpy as np
import pandas as pd
import cv2
import glob
import time
import albumentations
import math
from scipy.special import softmax
from sklearn.preprocessing import OneHotEncoder

from models.ResNext50 import Myresnext50
from train.train_classification import trainer_classification
from utils.utils import configure_optimizers
from Datasets.DataLoader import ImageDataset

### PyTorch Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils import data
from torchvision import transforms

# Data paths
train_data = glob.glob('data/train/*/*')
val_data = glob.glob('data/val/*/*')

labels = [x.split('/')[-2] for x in train_data]
cell_types = set(labels)
cell_types = list(cell_types)
cell_types.sort()

cell_types_df = pd.DataFrame(cell_types, columns=['Cell_Types'])
cell_types_df['Cell_Types'] = cell_types_df['Cell_Types'].astype('category')
cell_types_df['Cell_Types_Cat'] = cell_types_df['Cell_Types'].cat.codes

enc = OneHotEncoder(handle_unknown='ignore')
enc_df = pd.DataFrame(enc.fit_transform(cell_types_df[['Cell_Types_Cat']]).toarray())
cell_types_df = cell_types_df.join(enc_df)

# Normalization pipeline

transform_pipeline = albumentations.Compose(
        [
            albumentations.Normalize(mean=(0.5642, 0.5026, 0.6960), std=(0.2724,
 0.2838, 0.2167)),

        ]
    )

# Model setup
resnext50_pretrained = torch.hub.load('pytorch/vision:v0.10.0', 'resnext50_32x4d')
My_model = Myresnext50(my_pretrained_model=resnext50_pretrained, num_classes=23)

checkpoint_PATH = 'checkpoints/model_checkpoint.ckpt'
checkpoint = torch.load(checkpoint_PATH)

def remove_data_parallel(old_state_dict):
    new_state_dict = OrderedDict()
    for k, v in old_state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    return new_state_dict

checkpoint = remove_data_parallel(checkpoint['model_state_dict'])
My_model.load_state_dict(checkpoint, strict=True)



In [None]:
My_model = My_model.cuda().eval()
dataset = ImageDataset(img_list=val_data, split='viz', df=cell_types_df, transform=transform_pipeline)
dataloader = DataLoader(dataset, batch_size=32, num_workers=2, shuffle=False)

# Initialize prediction storage
predictions = []
labels = []
hidden_features = []
sample_ids = []

# Batch processing
for i, batch in enumerate(dataloader):
    # Process first batch
    if i == 0:
        images = batch["image"].cuda()
        batch_labels = batch["label"]
        # Generate anonymized sample IDs
        sample_ids = [f"sample_{idx}" for idx in range(len(batch['ID']))]
        
        # Get model predictions
        batch_pred_probs = My_model(images)
        batch_hidden_features = My_model.pretrained(images)

        # Convert to numpy arrays
        predictions = torch.flatten(batch_pred_probs, start_dim=1).detach().cpu().numpy()
        labels = torch.flatten(batch_labels, start_dim=1).cpu().numpy()
        hidden_features = torch.flatten(batch_hidden_features, start_dim=1).detach().cpu().numpy()
    
    # Process subsequent batches
    else:
        images = batch["image"].cuda()
        batch_labels = batch["label"]
        batch_sample_ids = [f"sample_{len(sample_ids) + idx}" for idx in range(len(batch['ID']))]
        
        batch_pred_probs = My_model(images)
        batch_hidden_features = My_model.pretrained(images)
        
        batch_pred_probs = torch.flatten(batch_pred_probs, start_dim=1).detach().cpu().numpy()
        batch_labels = torch.flatten(batch_labels, start_dim=1).cpu().numpy()
        batch_hidden_features = torch.flatten(batch_hidden_features, start_dim=1).detach().cpu().numpy()
        
        # Concatenate results
        sample_ids.extend(batch_sample_ids)
        predictions = np.concatenate((predictions, batch_pred_probs))
        labels = np.concatenate((labels, batch_labels))
        hidden_features = np.concatenate((hidden_features, batch_hidden_features))

# Apply softmax to predictions
predictions = softmax(predictions, axis=1)

# Print shapes for verification
print(f"Predictions shape: {predictions.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Hidden features shape: {hidden_features.shape}")
print(f"Number of samples: {len(sample_ids)}")

# Generate class predictions
class_predictions = np.zeros_like(predictions)
for i in range(predictions.shape[0]):
    class_predictions[i] = (predictions[i] == np.max(predictions[i])).astype(int)

print(f"Class predictions shape: {class_predictions.shape}")

In [6]:
label = pd.DataFrame(data=labels, index=sample_ids, columns=cell_types_df['Cell_Types'].tolist())
pred_prob = pd.DataFrame(data=predictions, index=sample_ids, columns=cell_types_df['Cell_Types'].tolist())
pred_class = pd.DataFrame(data=predictions, index=sample_ids, columns=cell_types_df['Cell_Types'].tolist())

In [None]:
from sklearn.metrics import (roc_auc_score, accuracy_score, recall_score, precision_score, f1_score)
from scipy.special import softmax
def evaluation_metrics_multiclass(label, pred_prob, pred_class):
    #creating a set of all the unique classes using the actual class list
    classes = label.columns
    eval_dict = {}
    
    for per_class in classes:
        #creating a list of all the classes except the current class 
        groundtruth_per_class  = label[per_class].tolist()
        pred_prob_per_class    = pred_prob[per_class].tolist()
        pred_class_per_class = pred_class[per_class].tolist()
        #marking the current class as 1 and all other classes as 0

        roc_auc = roc_auc_score(groundtruth_per_class, pred_prob_per_class)
        f1 = f1_score(groundtruth_per_class, pred_class_per_class)
        acc = accuracy_score(groundtruth_per_class, pred_class_per_class)
        precision = precision_score(groundtruth_per_class, pred_class_per_class)
        recall = recall_score(groundtruth_per_class, pred_class_per_class)
        used_metrics = ['AUC','F1','Acc','Precision','Recall']
        eval_dict[per_class] = [roc_auc, f1, acc, precision, recall]

    return eval_dict


# assuming your already have a list of actual_class and predicted_class from the logistic regression classifier
multiclass = evaluation_metrics_multiclass(label, pred_prob, pred_class)
print(multiclass)

In [None]:
df = pd.DataFrame.from_dict(multiclass)
df.index = ['AUC','F1','Acc','Precision','Recall']
df.T

In [9]:
from utils.utils import one_vs_rest_metrics, get_overall_multiclass_metrics

In [None]:
df_one_vs_rest = one_vs_rest_metrics(np.argmax(label.to_numpy(), axis=1),
                    np.argmax(pred_class.to_numpy(), axis=1), 
                    pred_prob.to_numpy())
df_one_vs_rest.index = cell_types_df['Cell_Types'].tolist()
df_one_vs_rest

In [None]:

df_overall_multiclass_metrics = get_overall_multiclass_metrics(np.argmax(label.to_numpy(), axis=1),
                    np.argmax(pred_class.to_numpy(), axis=1), 
                    pred_prob.to_numpy())
df_overall_multiclass_metrics