In [20]:
pip install torch torchvision torchaudio




In [22]:
ls

 Volume in drive C is OS
 Volume Serial Number is EA49-7D93

 Directory of C:\Users\pandu\Desktop\projectcs499\src\testing

27-03-2025  11:22 PM    <DIR>          .
27-03-2025  04:17 PM    <DIR>          ..
27-03-2025  04:18 PM    <DIR>          .ipynb_checkpoints
27-03-2025  11:22 PM            28,739 compute-performance-metrics.ipynb
27-03-2025  11:24 PM    <DIR>          test
27-03-2025  04:50 PM            75,885 thyroid_clean.csv
27-03-2025  04:46 PM            46,110 trained_clinical_model.pth
27-03-2025  04:44 PM       163,948,074 trained_imaging_model.pth
27-03-2025  04:44 PM       163,940,990 trained_imaging_model_dataset2.pth
27-03-2025  04:44 PM       163,934,846 trained_imaging_model_dataset3.pth
27-03-2025  04:46 PM         1,607,878 trained_multimodal_model.pth
               7 File(s)    493,582,522 bytes
               4 Dir(s)  17,175,920,640 bytes free


In [3]:
!nvidia-smi

'nvidia-smi' is not recognized as an internal or external command,
operable program or batch file.


In [24]:
from torchvision.models import resnet18, ResNet18_Weights

# Use the new API to load pretrained weights
resnet = resnet18(weights=ResNet18_Weights.DEFAULT)


In [26]:
# Cell: Updated HybridImagingModel Using Pretrained ResNet18

import torch
import torch.nn as nn
import torchvision.models as models

class HybridImagingModel(nn.Module):
    def __init__(self, output_dim=512):
        super(HybridImagingModel, self).__init__()
        # Load pretrained ResNet18 model
        resnet = models.resnet18(pretrained=True)
        # Remove the final FC layer
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])  # Output shape: (batch, 512, 1, 1)
        # New fully-connected layer to get desired output dimension (flattening the 512 features)
        self.fc = nn.Linear(512, output_dim)
    
    def forward(self, x):
        # x is assumed to be resized to 224x224
        x = self.feature_extractor(x)  # Shape: (batch, 512, 1, 1)
        x = x.view(x.size(0), -1)        # Flatten to (batch, 512)
        x = self.fc(x)                   # Output shape: (batch, output_dim)
        return x

# For completeness, here are the unchanged ClinicalModel and FusionModule

class ClinicalModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ClinicalModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )
    
    def forward(self, x):
        return self.fc(x)

class FusionModule(nn.Module):
    def __init__(self, img_dim, clin_dim, fused_dim, num_classes):
        super(FusionModule, self).__init__()
        self.fusion = nn.Linear(img_dim + clin_dim, fused_dim)
        self.classifier = nn.Linear(fused_dim, num_classes)
        self.risk_regressor = nn.Linear(fused_dim, 1)
    
    def forward(self, img_feat, clin_feat):
        fused_feat = torch.cat((img_feat, clin_feat), dim=1)
        fused_feat = self.fusion(fused_feat)
        class_logits = self.classifier(fused_feat)
        risk_score = self.risk_regressor(fused_feat)
        return class_logits, risk_score

# Updated MultimodalModel using the new HybridImagingModel
class MultimodalModel(nn.Module):
    def __init__(self, num_classes, clinical_input_dim):
        super(MultimodalModel, self).__init__()
        self.imaging_model = HybridImagingModel(output_dim=512)  # Now using ResNet18 backbone
        self.clinical_model = ClinicalModel(input_dim=clinical_input_dim, output_dim=32)
        self.fusion_module = FusionModule(img_dim=512, clin_dim=32, fused_dim=256, num_classes=num_classes)
    
    def forward(self, image, clinical_data):
        img_feat = self.imaging_model(image)
        clin_feat = self.clinical_model(clinical_data)
        class_logits, risk_score = self.fusion_module(img_feat, clin_feat)
        return class_logits, risk_score

# Test the updated HybridImagingModel and MultimodalModel with dummy data
dummy_image = torch.randn(4, 3, 224, 224)  # New input size: 224x224
dummy_clinical = torch.randn(4, 10)
model_test = MultimodalModel(num_classes=9, clinical_input_dim=10)
class_out, risk_out = model_test(dummy_image, dummy_clinical)
print("Updated HybridImagingModel Test - Classification Output Shape:", class_out.shape)  # Expected: [4, 9]
print("Updated HybridImagingModel Test - Risk Prediction Output Shape:", risk_out.shape)   # Expected: [4, 1]




Updated HybridImagingModel Test - Classification Output Shape: torch.Size([4, 9])
Updated HybridImagingModel Test - Risk Prediction Output Shape: torch.Size([4, 1])


In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# --- Updated Hybrid Imaging Model ---
# Note: We assume that the pretrained weights expect the imaging branch to output 64 features.
class HybridImagingModel(nn.Module):
    def __init__(self):
        super(HybridImagingModel, self).__init__()
        # Example dummy CNN architecture; in practice, you may be using a pretrained ResNet.
        # Here, we simulate an architecture that eventually outputs 64 features.
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),  # [B, 32, 320, 320]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # [B, 32, 160, 160]
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),  # [B, 64, 80, 80]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))  # [B, 64, 1, 1]
        )
        # Flatten to get 64-dim feature vector
        self.fc = nn.Flatten()
    
    def forward(self, x):
        x = self.features(x)  # [B, 64, 1, 1]
        x = self.fc(x)        # [B, 64]
        return x

# --- Clinical Data Model ---
class ClinicalModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ClinicalModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)  # output_dim is 32 in our case
        )
    
    def forward(self, x):
        return self.fc(x)

# --- Fusion Module ---
# Here, we update the imaging feature dimension to 64 (instead of 512)
class FusionModule(nn.Module):
    def __init__(self, img_dim, clin_dim, fused_dim, num_classes):
        super(FusionModule, self).__init__()
        # img_dim is now 64, clin_dim remains 32.
        self.fusion = nn.Linear(img_dim + clin_dim, fused_dim)
        self.classifier = nn.Linear(fused_dim, num_classes)
        self.risk_regressor = nn.Linear(fused_dim, 1)  # For risk prediction
    
    def forward(self, img_feat, clin_feat):
        fused_feat = torch.cat((img_feat, clin_feat), dim=1)  # [B, 64+32]
        fused_feat = self.fusion(fused_feat)                  # [B, fused_dim]
        class_logits = self.classifier(fused_feat)            # [B, num_classes]
        risk_score = self.risk_regressor(fused_feat)          # [B, 1]
        return class_logits, risk_score, fused_feat

# --- Multimodal Model ---
class MultimodalModel(nn.Module):
    def __init__(self, num_classes, clinical_input_dim):
        super(MultimodalModel, self).__init__()
        self.imaging_model = HybridImagingModel()   # Outputs 64-dim imaging features
        self.clinical_model = ClinicalModel(input_dim=clinical_input_dim, output_dim=32)
        # Update fusion module to expect 64-dim imaging features
        self.fusion_module = FusionModule(img_dim=64, clin_dim=32, fused_dim=256, num_classes=num_classes)
    
    def forward(self, image, clinical_data):
        img_feat = self.imaging_model(image)         # [B, 64]
        clin_feat = self.clinical_model(clinical_data) # [B, 32]
        class_logits, risk_score, fused_feat = self.fusion_module(img_feat, clin_feat)
        return class_logits, risk_score

# Quick local test using dummy data:
dummy_img = torch.randn(4, 3, 640, 640)
dummy_clin = torch.randn(4, 10)  # clinical input dimension = 10
model_test = MultimodalModel(num_classes=9, clinical_input_dim=10)
out_cls, out_risk = model_test(dummy_img, dummy_clin)
print("Updated HybridImagingModel Test - Classification Output Shape:", out_cls.shape)  # Expected: [4, 9]
print("Updated HybridImagingModel Test - Risk Prediction Output Shape:", out_risk.shape)   # Expected: [4, 1]


Updated HybridImagingModel Test - Classification Output Shape: torch.Size([4, 9])
Updated HybridImagingModel Test - Risk Prediction Output Shape: torch.Size([4, 1])


In [30]:
import os
import cv2
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import torchvision.transforms as T

class MultimodalDataset(Dataset):
    """
    Loads preprocessed images, imaging labels, and clinical data.
    Assumes patient id is the first token in the image filename (separated by underscore).
    """
    def __init__(self, images_dir, labels_dir, clinical_csv, transform=None):
        self.images_dir = Path(images_dir)
        self.labels_dir = Path(labels_dir)
        self.image_files = list(self.images_dir.glob("*.jpg"))
        self.transform = transform
        self.clinical_df = pd.read_csv(clinical_csv)
        self.clinical_df['id'] = self.clinical_df['id'].astype(str)
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        img = cv2.imread(str(img_path))
        if img is None:
            raise RuntimeError(f"Unable to read image: {img_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transform:
            img = self.transform(img)
        else:
            img = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
        
        # Read imaging label from corresponding text file (default to 0 if missing)
        label_file = self.labels_dir / (img_path.stem + ".txt")
        imaging_label = 0
        if label_file.exists() and label_file.stat().st_size > 0:
            with open(label_file, "r") as f:
                line = f.readline().strip()
                if line:
                    imaging_label = int(line.split()[0])
        
        # Extract clinical features based on patient id (assumed first token of filename)
        patient_id = img_path.stem.split("_")[0]
        clin_row = self.clinical_df[self.clinical_df["id"] == patient_id]
        if clin_row.empty:
            clinical_features = torch.zeros(10, dtype=torch.float32)
        else:
            clinical_features = torch.tensor(clin_row.drop(columns=["id", "mal"], errors="ignore").values[0],
                                             dtype=torch.float32)
        
        return img, torch.tensor(imaging_label, dtype=torch.long), clinical_features

# Define transforms
transform = T.Compose([
    T.ToPILImage(),
    T.Resize((640, 640)),
    T.ToTensor(),
])

# Update these file paths as per your local structure:
test_images_dir = "test/images"      # Folder containing test images
test_labels_dir = "test/labels"        # Folder containing test label files
clinical_csv = "thyroid_clean.csv"     # Path to the clinical CSV file

# Create test dataset and DataLoader (limit sample size for testing if needed)
test_dataset = MultimodalDataset(test_images_dir, test_labels_dir, clinical_csv, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)
print("Test dataset loaded with", len(test_dataset), "samples.")


Test dataset loaded with 50 samples.


In [None]:
import time
import torch
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Make sure your MultimodalModel and MultimodalDataset are defined (see previous cells)

# Free up GPU memory (if any) and force CPU usage
torch.cuda.empty_cache()
device = torch.device("cpu")
print("Using device:", device)

# Define paths (adjust these according to your local file structure)
model_path = "trained_multimodal_model.pth"
test_images_dir = "test/images"        # Folder containing test images
test_labels_dir = "test/labels"          # Folder containing test label files
clinical_csv = "thyroid_clean.csv"       # Path to the clinical CSV file

# Create test dataset (the MultimodalDataset should be defined in a previous cell)
test_dataset = MultimodalDataset(
    images_dir=test_images_dir,
    labels_dir=test_labels_dir,
    clinical_csv=clinical_csv,
    transform=T.Compose([
        T.ToPILImage(),
        T.Resize((640, 640)),
        T.ToTensor(),
    ])
)

print("Full test dataset size:", len(test_dataset), "samples.")

# To save time, select a small subset for quick evaluation (e.g., 10 samples)
subset_indices = list(range(50))
subset_dataset = Subset(test_dataset, subset_indices)
# ..n num_workers=0 for local CPU systems to avoid potential issues.
test_loader = DataLoader(subset_dataset, batch_size=4, shuffle=False, num_workers=0)
print("Using subset test dataset with", len(subset_dataset), "samples.")

# Load the multimodal model on CPU first, then move it to device
model = MultimodalModel(num_classes=9, clinical_input_dim=10).to("cpu")
# Load state_dict safely (ignoring mismatches if needed)
pretrained_dict = torch.load(model_path, map_location="cpu")
model_dict = model.state_dict()
filtered_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and model_dict[k].shape == v.shape}
model_dict.update(filtered_dict)
model.load_state_dict(model_dict, strict=False)
model.to(device)
model.eval()
print("Multimodal model loaded and set to evaluation mode on", device)

# Start timing the evaluation
start_time = time.time()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels, clinical_data in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        clinical_data = clinical_data.to(device)
        
        class_logits, risk_score = model(images, clinical_data)
        preds = torch.argmax(class_logits, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

end_time = time.time()

# Compute evaluation metrics
acc = accuracy_score(all_labels, all_preds)
cm = confusion_matrix(all_labels, all_preds)
report = classification_report(all_labels, all_preds, zero_division=0)

print("\nEvaluation complete.")
print("Test Accuracy: {:.2f}%".format(acc * 100))
print("Classification Report:\n", report)


Using device: cpu
Full test dataset size: 50 samples.
Using subset test dataset with 50 samples.
Multimodal model loaded and set to evaluation mode on cpu

Evaluation complete.
Test Accuracy: 80.00%
Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       0.00      0.00      0.00         1

    accuracy                           0.80         5
   macro avg       0.40      0.50      0.44         5
weighted avg       0.64      0.80      0.71         5

Total evaluation time: 18000.24 seconds


In [None]:
import torch
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

# Free up GPU memory if needed
torch.cuda.empty_cache()

# 1. Set device to CPU (for testing; adjust if you have enough GPU memory)
device = torch.device("cpu")
print("Using device:", device)

# 2. Specify model path (adjust the path to your local file)
model_path = "trained_multimodal_model.pth"
print("Loading model from:", model_path)

# 3. Load the multimodal model on CPU first
# (Make sure MultimodalModel and its submodules are defined in previous cells)
model = MultimodalModel(num_classes=9, clinical_input_dim=10).cpu()

# 4. Load the state dict safely with filtering to ignore mismatches
state_dict = torch.load(model_path, map_location="cpu")
model_dict = model.state_dict()
filtered_state_dict = {k: v for k, v in state_dict.items() 
                       if k in model_dict and model_dict[k].shape == v.shape}
model_dict.update(filtered_state_dict)
model.load_state_dict(model_dict, strict=False)
model.to(device)
model.eval()
print("Multimodal model loaded and set to evaluation mode on", device)

# 5. Define test dataset paths (update these according to your local structure)
test_images_dir = "test/images"      # Folder containing test images
test_labels_dir = "test/labels"        # Folder containing test label files
clinical_csv = "thyroid_clean.csv"     # Path to the clinical CSV file

# 6. Create the test dataset and DataLoader.
# (Ensure that the class MultimodalDataset and the 'transform' variable are defined in previous cells)
test_dataset = MultimodalDataset(test_images_dir, test_labels_dir, clinical_csv, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)
print("Test dataset loaded with", len(test_dataset), "samples.")

# 7. Evaluate the model on the test dataset
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels, clinical_data in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        clinical_data = clinical_data.to(device)
        
        class_logits, risk_score = model(images, clinical_data)
        preds = torch.argmax(class_logits, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 8. Compute evaluation metrics
acc = accuracy_score(all_labels, all_preds)
cm = confusion_matrix(all_labels, all_preds)
report = classification_report(all_labels, all_preds, zero_division=0)

print("Test Accuracy: {:.2f}%".format(acc * 100))
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", report)

# 9. Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=np.unique(all_labels),
            yticklabels=np.unique(all_labels))
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()
