In [5]:
# Imports
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Hugging Face Transformers for BART summarization
from transformers import BartForConditionalGeneration, BartTokenizer

# Set device to CPU (or GPU if available, but here we'll use CPU as per your environment)
device = torch.device("cpu")
print("Using device:", device)

# Free up any cached GPU memory (if needed)
torch.cuda.empty_cache()


Using device: cpu


In [6]:
import torch
import torch.nn as nn

# ----- Hybrid Imaging Model -----
class HybridImagingModel(nn.Module):
    def __init__(self):
        super(HybridImagingModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.fc = nn.Linear(64, 512)
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# ----- Clinical Model (MLP) -----
class ClinicalModel(nn.Module):
    def __init__(self, input_dim, output_dim=2):
        super(ClinicalModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# ----- Fusion Module -----
class FusionModule(nn.Module):
    def __init__(self, img_dim=512, clin_dim=2, fused_dim=256, num_classes=9):
        super(FusionModule, self).__init__()
        self.img_proj = nn.Linear(img_dim, fused_dim)
        self.clin_proj = nn.Linear(clin_dim, fused_dim)
        self.gate_fc = nn.Sequential(
            nn.Linear(2 * fused_dim, fused_dim),
            nn.ReLU(inplace=True),
            nn.Linear(fused_dim, fused_dim),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(fused_dim, num_classes)
        self.risk_predictor = nn.Linear(fused_dim, 1)
    
    def forward(self, f_img, f_clin):
        img_feat = self.img_proj(f_img)
        clin_feat = self.clin_proj(f_clin)
        combined = torch.cat((img_feat, clin_feat), dim=1)
        gate = self.gate_fc(combined)
        fused_feature = gate * img_feat + (1 - gate) * clin_feat
        class_logits = self.classifier(fused_feature)
        risk_score = self.risk_predictor(fused_feature)
        return class_logits, risk_score, fused_feature

# ----- Integrated Multimodal Model -----
class MultimodalModel(nn.Module):
    def __init__(self, num_classes, clinical_input_dim):
        super(MultimodalModel, self).__init__()
        self.imaging_model = HybridImagingModel()  # Outputs 512-dim imaging features
        self.clinical_model = ClinicalModel(input_dim=clinical_input_dim, output_dim=2)
        self.fusion_module = FusionModule(img_dim=512, clin_dim=2, fused_dim=256, num_classes=num_classes)
    
    def forward(self, image, clinical_data):
        img_feat = self.imaging_model(image)         # (batch, 512)
        clin_feat = self.clinical_model(clinical_data) # (batch, 2)
        class_logits, risk_score, _ = self.fusion_module(img_feat, clin_feat)
        return class_logits, risk_score

# Quick test with dummy data:
dummy_image = torch.randn(4, 3, 640, 640)
dummy_clinical = torch.randn(4, 10)  # Assume clinical_input_dim = 10
model_test = MultimodalModel(num_classes=9, clinical_input_dim=10)
class_out, risk_out = model_test(dummy_image, dummy_clinical)
print("Multimodal Model Classification Output Shape:", class_out.shape)  # Expected: [4, 9]
print("Multimodal Model Risk Prediction Output Shape:", risk_out.shape)   # Expected: [4, 1]


Multimodal Model Classification Output Shape: torch.Size([4, 9])
Multimodal Model Risk Prediction Output Shape: torch.Size([4, 1])


In [9]:
import torch

# Make sure the following variable 'device' is defined in a previous cell (or define here)
device = torch.device("cpu")  # For local testing on CPU

# Path to your saved multimodal model (.pth file)
model_path = "/kaggle/input/multi_model/pytorch/default/1/trained_multimodal_model.pth"
print("Loading model from:", model_path)

# Set model parameters based on your training setup:
# - num_classes: the number of imaging classes (here 9)
# - clinical_input_dim: the original clinical feature dimension (e.g., 18) that was reduced to 2 by your ClinicalModel
clinical_input_dim = 18  # Update based on your clinical CSV preprocessing
num_classes = 9

# Initialize the model on CPU first to avoid memory spikes
model = MultimodalModel(num_classes=num_classes, clinical_input_dim=clinical_input_dim).to("cpu")

# Load the state dictionary on CPU (using strict=False to ignore minor mismatches)
state_dict = torch.load(model_path, map_location="cpu")
model.load_state_dict(state_dict, strict=False)

# Move the model to the target device and set to evaluation mode
model.to(device)
model.eval()
print("Multimodal model loaded and set to evaluation mode on", device)


Loading model from: /kaggle/input/multi_model/pytorch/default/1/trained_multimodal_model.pth


  state_dict = torch.load(model_path, map_location="cpu")


Multimodal model loaded and set to evaluation mode on cpu


In [11]:
# For example, load a new ultrasound image for inference:
test_image_path = "/kaggle/input/dataset3-follicular/Follicular_Variant_Thyroid_CA.v1i.yolov9/test/images/FVPTC100x-digitalzoom-well-developed-PTC-nuclear-features_png.rf.2bfc16e232a7c744345f3e7ccda6aa3b.jpg"  # Update with your file path
test_clinical_csv = "/kaggle/input/clinical-dataset/thyroid_clean.csv"  # The CSV file with clinical data

# Load and preprocess the image
image = cv2.imread(test_image_path)
if image is None:
    raise RuntimeError(f"Unable to read test image: {test_image_path}")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use the same transforms as during training (adjust size if needed)
transform = T.Compose([
    T.ToPILImage(),
    T.Resize((640, 640)),
    T.ToTensor(),
])
image_tensor = transform(image).unsqueeze(0)  # Shape: [1, 3, 640, 640]

# Prepare clinical data input:
# Option 1: Manually input the clinical data as a dictionary (example values)
clinical_input = {
    "age": 45,
    "gender": 1,
    "FT3": 4.2,
    "FT4": 13.7,
    "TSH": 1.5,
    "TPO": 0.3,
    "TGAb": 1.2,
    "site": 0,
    "echo_pattern": 0,
    "multifocality": 0,
    "size": 1.2,
    "shape": 0,
    "margin": 1,
    "calcification": 0,
    "echo_strength": 3,
    "blood_flow": 0,
    "composition": 2,
    "multilateral": 0
}
# Convert the clinical input into a tensor.
# Ensure the order of features matches the order used during training (after dropping 'id' and 'mal').
clinical_features = np.array([clinical_input[key] for key in sorted(clinical_input.keys())], dtype=np.float32)
clinical_tensor = torch.tensor(clinical_features).unsqueeze(0)  # Shape: [1, clinical_input_dim]

print("Test image and clinical data prepared.")


Test image and clinical data prepared.


In [12]:
with torch.no_grad():
    # Run the model
    class_logits, risk_score = model(image_tensor.to(device), clinical_tensor.to(device))
    predicted_class = torch.argmax(class_logits, dim=1).item()
    risk_value = risk_score.squeeze().item()

print(f"Predicted Class: {predicted_class}")
print(f"Risk Score: {risk_value:.2f}")


Predicted Class: 1
Risk Score: 0.24


In [13]:
# Construct a prompt based on the multimodal model outputs.
# Map your predicted class to a diagnosis label (for example):
diagnosis_map = {
    0: "Benign",
    1: "Papillary Thyroid Carcinoma",
    2: "Follicular Thyroid Carcinoma",
    3: "Medullary Thyroid Carcinoma",
    4: "Anaplastic Thyroid Carcinoma",
    5: "Other",
    6: "Undetermined",
    7: "Suspicious",
    8: "Normal"
}
diagnosis = diagnosis_map.get(predicted_class, "Unknown")

prompt = (f"Based on the ultrasound image analysis and clinical data, "
          f"the model predicts {diagnosis} with a risk score of {risk_value:.2f}. "
          f"Please generate a concise diagnostic report summarizing these findings.")

print("Generated prompt for summarization:")
print(prompt)


Generated prompt for summarization:
Based on the ultrasound image analysis and clinical data, the model predicts Papillary Thyroid Carcinoma with a risk score of 0.24. Please generate a concise diagnostic report summarizing these findings.


In [14]:
# Load the pretrained BART model and tokenizer for summarization.
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn").to(device)

# Tokenize the prompt.
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)

# Generate the summary.
summary_ids = bart_model.generate(inputs.input_ids, num_beams=4, max_length=150, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("Generated Diagnostic Report:")
print(summary)


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Generated Diagnostic Report:
Based on the ultrasound image analysis and clinical data, the model predicts Papillary Thyroid Carcinoma with a risk score of 0.24. Please generate a concise diagnostic report summarizing these findings. Based on the Ultrasound image analysis, clinical data and the model, the models predicts a risk of 1 in 100,000.


In [15]:
# Format the final output as a JSON-like dictionary.
final_output = {
    "diagnosis": diagnosis,
    "risk_score": risk_value,
    "report": summary
}

print("Final Output:")
print(final_output)


Final Output:
{'diagnosis': 'Papillary Thyroid Carcinoma', 'risk_score': 0.24345403909683228, 'report': 'Based on the ultrasound image analysis and clinical data, the model predicts Papillary Thyroid Carcinoma with a risk score of 0.24. Please generate a concise diagnostic report summarizing these findings. Based on the Ultrasound image analysis, clinical data and the model, the models predicts a risk of 1 in 100,000.'}
