In [1]:
# ============================================================================
# CELL 1: INSTALL DEPENDENCIES
# ============================================================================
# Run this cell first and restart runtime after completion
# Group 1 - Model Loading & Quantization
!pip install -q bitsandbytes transformers accelerate peft pillow sentencepiece protobuf
# Group 2 - RAG & Vector Database
!pip install -q langchain langchain-community faiss-cpu sentence-transformers datasets pandas
# Group 3 - Computer Vision
!pip install -q opencv-python-headless imutils scikit-image webcolors scikit-learn
# Group 4 - Segment Anything (SAM) for fallback segmentation
!pip install -q segment-anything
# Group 5 - User Interface
!pip install -q gradio
# Group 6 - Additional utilities
!pip install -q beautifulsoup4 requests matplotlib
print("‚úÖ All dependencies installed! Please restart the runtime now.")
print("Go to: Runtime ‚Üí Restart runtime")

‚úÖ All dependencies installed! Please restart the runtime now.
Go to: Runtime ‚Üí Restart runtime


In [2]:
# ============================================================================
# CELL 2: IMPORTS
# ============================================================================
import os
import re
import cv2
import torch
import numpy as np
import pandas as pd
import gradio as gr
from PIL import Image
from datetime import datetime
import warnings
import traceback
from typing import Dict, List, Tuple, Optional, Any
# Computer Vision
from skimage import morphology, measure
from sklearn.cluster import KMeans
import webcolors
# HuggingFace & Models
from transformers import (
    AutoProcessor,
    MllamaForConditionalGeneration,
    BitsAndBytesConfig
)
from peft import PeftModel
from huggingface_hub import login
# RAG Components
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from datasets import load_dataset, concatenate_datasets
# Visualization
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
print("‚úÖ All imports successful!")

‚úÖ All imports successful!


In [3]:
# ============================================================================
# CELL 3: HUGGINGFACE AUTHENTICATION (SAFE)
# ============================================================================
from huggingface_hub import login
import os
import getpass

def authenticate_huggingface():
    """Authenticate with HuggingFace Hub securely"""
    try:
        # Try Colab secrets
        from google.colab import userdata
        hf_token = userdata.get('HF_TOKEN')
        if hf_token:
            print("‚úÖ Found HF_TOKEN in Colab secrets")
        else:
            raise ValueError
    except:
        print("‚ö†Ô∏è HF_TOKEN not found in Colab secrets")
        hf_token = getpass.getpass("Enter your HuggingFace token (hidden): ")

    try:
        login(token=hf_token)
        print("‚úÖ Successfully logged into HuggingFace Hub!")
        return True
    except Exception as e:
        print(f"‚ùå Login failed: {e}")
        return False

authenticate_huggingface()


‚ö†Ô∏è HF_TOKEN not found in Colab secrets
Enter your HuggingFace token (hidden): ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
‚úÖ Successfully logged into HuggingFace Hub!


True

In [4]:
# ============================================================================
# CELL 4: BUILD ENHANCED RAG KNOWLEDGE BASE
# ============================================================================
# Define medical keywords for classification
SKIN_KEYWORDS = [
    'melanoma', 'skin cancer', 'basal cell carcinoma', 'squamous cell carcinoma',
    'dermatology', 'dermoscopy', 'cutaneous', 'skin lesion', 'nevus', 'mole',
    'pigment network', 'blue-white veil', 'globules', 'dots', 'vascular',
    'atypical', 'dysplastic', 'malignant', 'benign', 'biopsy', 'excision',
    'breslow', 'clark level', 'sentinel node', 'metastasis', 'prognosis'
]
CARDIO_KEYWORDS = [
    'cardiovascular', 'cardiac', 'heart', 'myocardial', 'coronary',
    'stroke', 'hypertension', 'infarction'
]
def classify_abstract(text: str) -> Optional[str]:
    """Classify abstract by medical domain"""
    if not text:
        return None
    text_lower = text.lower()

    has_skin = any(
        re.search(r'\b' + re.escape(kw) + r'\b', text_lower)
        if ' ' not in kw
        else kw in text_lower
        for kw in SKIN_KEYWORDS
    )
    has_cardio = any(
        re.search(r'\b' + re.escape(kw) + r'\b', text_lower)
        for kw in CARDIO_KEYWORDS
    )

    if has_skin and has_cardio:
        return 'both'
    elif has_skin:
        return 'skin_cancer'
    elif has_cardio:
        return 'cardio'
    return None
def build_enhanced_rag_system():
    """Build multi-source RAG system with weighted retrieval"""
    print("üîÑ Building Enhanced RAG Knowledge Base...")

    all_documents = []

    # Source 1: Medical Abstracts (PubMed)
    print("  üìö Loading medical abstracts...")
    try:
        dataset = load_dataset("TimSchopf/medical_abstracts", trust_remote_code=True)
        combined = concatenate_datasets([dataset['train'], dataset['test']])

        for item in combined:
            text = item.get('medical_abstract', '') or item.get('text', '')
            if text:
                category = classify_abstract(text)
                if category in ['skin_cancer', 'both']:
                    all_documents.append(Document(
                        page_content=text,
                        metadata={
                            'source': 'pubmed',
                            'weight': 1.0,
                            'type': 'research'
                        }
                    ))
        print(f"    ‚úÖ Loaded {len(all_documents)} relevant abstracts")
    except Exception as e:
        print(f"    ‚ö†Ô∏è Error loading abstracts: {e}")

    # Source 2: Clinical Guidelines (embedded knowledge)
    print("  üìã Adding clinical guidelines...")
    guidelines = [
        {
            "content": """ABCDE Criteria for Melanoma Detection (AAD Guidelines):
            A - Asymmetry: One half unlike the other half
            B - Border: Irregular, scalloped or poorly defined border
            C - Color: Varied from one area to another; shades of tan, brown, black, white, red, blue
            D - Diameter: Melanomas are usually greater than 6mm when diagnosed, but can be smaller
            E - Evolving: A mole or skin lesion that looks different from the rest or is changing in size, shape or color
            Any lesion meeting 2+ criteria warrants dermatological evaluation.""",
            "weight": 2.0
        },
        {
            "content": """Dermoscopic Structures in Melanoma (Consensus Guidelines):
            - Blue-white veil: Blue-white structureless area, present in 35% of melanomas
            - Atypical pigment network: Irregular, thickened lines with variable mesh sizes
            - Irregular dots/globules: Black, brown or blue dots of variable size randomly distributed
            - Irregular streaks: Radial projections at the periphery, asymmetrically distributed
            - Regression structures: Blue-gray peppering, white scar-like areas
            - Atypical vessels: Polymorphous vessels including dotted, linear irregular, and hairpin vessels
            Presence of 3+ structures highly predictive of melanoma.""",
            "weight": 2.0
        },
        {
            "content": """Breslow Thickness and Prognosis (AJCC Guidelines):
            - In situ: Confined to epidermis, excellent prognosis
            - ‚â§1.0mm: 5-year survival >95%
            - 1.01-2.0mm: 5-year survival 80-90%
            - 2.01-4.0mm: 5-year survival 65-75%
            - >4.0mm: 5-year survival <50%
            Sentinel lymph node biopsy recommended for lesions >0.8mm with ulceration or >1.0mm.""",
            "weight": 2.0
        },
        {
            "content": """Benign vs Malignant Dermoscopic Patterns:
            BENIGN patterns: Symmetric pigment network, regular globules at periphery,
            homogeneous brown color, cobblestone pattern, comma vessels.
            MALIGNANT patterns: Asymmetry in structure and color, atypical network,
            blue-white veil, irregular dots/globules, regression, polymorphous vessels.
            The 2-step algorithm: Step 1 - Determine if melanocytic. Step 2 - If melanocytic,
            determine if benign or malignant using pattern analysis.""",
            "weight": 2.0
        }
    ]

    for guideline in guidelines:
        all_documents.append(Document(
            page_content=guideline["content"],
            metadata={
                'source': 'clinical_guidelines',
                'weight': guideline["weight"],
                'type': 'guideline'
            }
        ))
    print(f"    ‚úÖ Added {len(guidelines)} clinical guidelines")

    # Source 3: Reference Knowledge
    print("  üìñ Adding reference knowledge...")
    reference_docs = [
        {
            "content": """Pigment Network Analysis:
            Typical network: Light brown, thin lines forming a regular grid, fading at periphery.
            Common in benign nevi. Atypical network: Thick, irregular lines with variable mesh size,
            abrupt termination at periphery. Concerning for melanoma. Broadened network: Uniformly
            thick lines, seen in dysplastic nevi. Negative network: Serpiginous interconnecting
            hypopigmented lines, highly specific for melanoma.""",
            "weight": 1.5
        },
        {
            "content": """Vascular Patterns in Dermoscopy:
            Comma vessels: Curved, comma-shaped, typical of dermatofibromas.
            Dotted vessels: Small red dots, seen in melanoma and Spitz nevi.
            Linear irregular: Irregular caliber and course, melanoma indicator.
            Arborizing vessels: Tree-like branching, pathognomonic for BCC.
            Hairpin vessels: Loop-shaped, common in seborrheic keratosis.
            Polymorphous: Multiple vessel types, highly suggestive of melanoma.""",
            "weight": 1.5
        },
        {
            "content": """Blue-White Veil Clinical Significance:
            Definition: Irregular, confluent blue-gray pigmentation with overlying white ground-glass film.
            Histopathology: Corresponds to melanin in dermis with orthokeratosis above.
            Specificity: Over 90% specific for melanoma when present focally.
            Differential: Can be seen in heavily pigmented blue nevi.
            Clinical action: Presence warrants excision with histopathological examination.""",
            "weight": 1.5
        }
    ]

    for ref in reference_docs:
        all_documents.append(Document(
            page_content=ref["content"],
            metadata={
                'source': 'reference',
                'weight': ref["weight"],
                'type': 'reference'
            }
        ))
    print(f"    ‚úÖ Added {len(reference_docs)} reference documents")

    # Text chunking
    print("  ‚úÇÔ∏è Chunking documents...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )
    chunks = text_splitter.split_documents(all_documents)
    print(f"    ‚úÖ Created {len(chunks)} chunks")

    # Create embeddings and vector store
    print("  üßÆ Creating embeddings and FAISS index...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    vectorstore = FAISS.from_documents(chunks, embeddings)
    print(f"‚úÖ RAG system ready with {len(chunks)} searchable chunks!")

    return vectorstore, embeddings
# Build the RAG system
vectorstore, embeddings = build_enhanced_rag_system()

`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'TimSchopf/medical_abstracts' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet.
ERROR:datasets.load:`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'TimSchopf/medical_abstracts' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet.


üîÑ Building Enhanced RAG Knowledge Base...
  üìö Loading medical abstracts...
    ‚úÖ Loaded 3241 relevant abstracts
  üìã Adding clinical guidelines...
    ‚úÖ Added 4 clinical guidelines
  üìñ Adding reference knowledge...
    ‚úÖ Added 3 reference documents
  ‚úÇÔ∏è Chunking documents...
    ‚úÖ Created 10961 chunks
  üßÆ Creating embeddings and FAISS index...
‚úÖ RAG system ready with 10961 searchable chunks!


In [5]:
# ============================================================================
# CELL 5: WEIGHTED RETRIEVAL AND FACT VERIFICATION
# ============================================================================
def weighted_retrieval(vectorstore, query: str, k: int = 10) -> List[Tuple[Document, float]]:
    """Retrieve documents with authority-weighted ranking"""
    # Get more results than needed
    results = vectorstore.similarity_search_with_score(query, k=k*2)

    scored_results = []
    for doc, sim_score in results:
        weight = doc.metadata.get('weight', 1.0)

        # Lower score = better similarity, so divide by weight
        final_score = sim_score / weight
        scored_results.append((doc, final_score, sim_score))

    # Sort by final score (lower is better)
    scored_results.sort(key=lambda x: x[1])

    # Return top k with original similarity scores
    return [(doc, orig_score) for doc, _, orig_score in scored_results[:k]]
def verify_claim_support(claim: str, doc_content: str) -> bool:
    """Check if document content supports a claim"""
    claim_words = set(claim.lower().split())
    doc_words = set(doc_content.lower().split())

    # Simple overlap check
    overlap = len(claim_words & doc_words) / len(claim_words) if claim_words else 0
    return overlap > 0.3
def verify_with_multiple_sources(claim: str, vectorstore, k: int = 5) -> Tuple[str, float]:
    """Verify claim across multiple sources"""
    results = vectorstore.similarity_search(claim, k=k)

    support_count = sum(
        1 for doc in results
        if verify_claim_support(claim, doc.page_content)
    )

    confidence = support_count / len(results) if results else 0

    if confidence > 0.8:
        return "High confidence (supported by multiple sources)", confidence
    elif confidence > 0.5:
        return "Moderate confidence (some support)", confidence
    else:
        return "Low confidence (limited support)", confidence
print("‚úÖ Retrieval functions ready!")

‚úÖ Retrieval functions ready!


In [6]:
# ============================================================================
# CELL 6: LOAD LLAMA 3.2 VISION MODEL
# ============================================================================
print("üîÑ Loading Llama 3.2 Vision Model (this takes 2-3 minutes)...")
# Quantization configuration for memory efficiency
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_skip_modules=["vision_model"]
)
# Load base model
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
print(f"  üì• Loading base model: {model_id}")
model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
# Load processor
print("  üì• Loading processor...")
processor = AutoProcessor.from_pretrained(model_id)
# Load DermatoLLama adapter
print("  üì• Loading DermatoLLama adapter...")
try:
    adapter_id = "DermaVLM/DermatoLLama-50k"
    model = PeftModel.from_pretrained(model, adapter_id)
    print("  ‚úÖ DermatoLLama adapter loaded!")
except Exception as e:
    print(f"  ‚ö†Ô∏è Could not load adapter: {e}")
    print("  Continuing with base model...")
model.eval()
print("‚úÖ Vision model ready!")
print(f"  Device: {next(model.parameters()).device}")

`torch_dtype` is deprecated! Use `dtype` instead!


üîÑ Loading Llama 3.2 Vision Model (this takes 2-3 minutes)...
  üì• Loading base model: meta-llama/Llama-3.2-11B-Vision-Instruct


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

  üì• Loading processor...
  üì• Loading DermatoLLama adapter...
  ‚úÖ DermatoLLama adapter loaded!
‚úÖ Vision model ready!
  Device: cuda:0


In [7]:
# ============================================================================
# CELL 7: CALIBRATION SYSTEM
# ============================================================================
def detect_reference_object(img_rgb: np.ndarray) -> Tuple[float, float, str]:
    """
    Detect calibration reference objects in the image.
    Returns: (pixels_per_mm, confidence, method)
    """
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    h, w = gray.shape

    # Try to detect ruler markings (evenly spaced lines)
    edges = cv2.Canny(gray, 50, 150)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=50,
                            minLineLength=20, maxLineGap=5)

    if lines is not None and len(lines) > 10:
        # Look for vertical lines at regular intervals (ruler graduations)
        vertical_lines = []
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if abs(x2 - x1) < 5:  # Nearly vertical
                vertical_lines.append((x1 + x2) / 2)

        if len(vertical_lines) > 5:
            vertical_lines = sorted(vertical_lines)
            gaps = np.diff(vertical_lines)

            # Check for regular spacing
            if len(gaps) > 3:
                median_gap = np.median(gaps)
                consistent = np.sum(np.abs(gaps - median_gap) < median_gap * 0.3)

                if consistent / len(gaps) > 0.6:
                    # Assume 1mm between ruler marks
                    pixels_per_mm = median_gap
                    return pixels_per_mm, 0.85, "ruler_detected"

    # Try to detect circles (coins)
    circles = cv2.HoughCircles(
        gray, cv2.HOUGH_GRADIENT, dp=1, minDist=50,
        param1=100, param2=50, minRadius=20, maxRadius=100
    )

    if circles is not None:
        circles = np.uint16(np.around(circles))
        for circle in circles[0, :]:
            x, y, r = circle
            # Check if circle is near edge (likely calibration object)
            if x < w * 0.2 or x > w * 0.8 or y < h * 0.2 or y > h * 0.8:
                # Assume US penny (19.05mm diameter)
                diameter_pixels = r * 2
                pixels_per_mm = diameter_pixels / 19.05
                return pixels_per_mm, 0.70, "coin_detected"

    # Default: estimate based on typical dermoscopy
    # Most dermoscopes have ~10-15 pixels per mm at standard zoom
    return 10.0, 0.3, "estimated_dermoscopy_default"
def apply_calibration(measurements: Dict, pixels_per_mm: float) -> Dict:
    """Convert pixel measurements to real-world units"""
    calibrated = measurements.copy()

    if 'area_pixels' in measurements:
        calibrated['area_mm2'] = measurements['area_pixels'] / (pixels_per_mm ** 2)
    if 'perimeter_pixels' in measurements:
        calibrated['perimeter_mm'] = measurements['perimeter_pixels'] / pixels_per_mm
    if 'diameter_pixels' in measurements:
        calibrated['diameter_mm'] = measurements['diameter_pixels'] / pixels_per_mm

    calibrated['pixels_per_mm'] = pixels_per_mm
    return calibrated
print("‚úÖ Calibration system ready!")

‚úÖ Calibration system ready!


In [8]:
# ============================================================================
# CELL 8: IMAGE PREPROCESSING
# ============================================================================
def load_and_preprocess_pil(pil_image: Image.Image, max_dim: int = 1024) -> np.ndarray:
    """Convert PIL image to OpenCV format and resize"""
    img = np.array(pil_image)

    # Handle grayscale
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    # Handle RGBA
    elif img.shape[2] == 4:
        img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)

    # Resize if needed
    h, w = img.shape[:2]
    if max(h, w) > max_dim:
        scale = max_dim / max(h, w)
        new_w, new_h = int(w * scale), int(h * scale)
        img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

    return img
def remove_hairs(img_rgb: np.ndarray) -> np.ndarray:
    """Remove hair artifacts from dermoscopic images using black-hat transform"""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

    # Black-hat transform to detect dark lines (hairs)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9))
    blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)

    # Threshold to create hair mask
    _, hair_mask = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)

    # Dilate to ensure full coverage
    hair_mask = cv2.dilate(hair_mask, kernel, iterations=1)

    # Inpaint using Telea algorithm
    result = cv2.inpaint(img_rgb, hair_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

    return result
print("‚úÖ Preprocessing functions ready!")

‚úÖ Preprocessing functions ready!


In [9]:
# ============================================================================
# CELL 9: LESION SEGMENTATION
# ============================================================================
def segment_lesion_kmeans(img_rgb: np.ndarray, k: int = 2,
                          min_size: int = 500) -> Tuple[Optional[np.ndarray], float]:
    """
    Segment lesion using K-means clustering.
    Returns: (mask, quality_score) or (None, 0) if failed
    """
    try:
        # Convert to LAB color space
        img_lab = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2LAB)
        h, w = img_lab.shape[:2]

        # Reshape for K-means
        pixels = img_lab.reshape(-1, 3).astype(np.float32)

        # Apply K-means
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
        _, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10,
                                         cv2.KMEANS_RANDOM_CENTERS)

        # Find darkest cluster (likely lesion)
        avg_lightness = [centers[i][0] for i in range(k)]
        lesion_cluster = np.argmin(avg_lightness)

        # Create mask
        mask = (labels.reshape(h, w) == lesion_cluster).astype(np.uint8) * 255

        # Post-processing
        mask_bool = mask > 0
        mask_cleaned = morphology.remove_small_objects(mask_bool, min_size=min_size)
        mask_filled = morphology.remove_small_holes(mask_cleaned, area_threshold=min_size)
        mask = (mask_filled.astype(np.uint8)) * 255

        # Calculate quality score
        lesion_ratio = np.sum(mask > 0) / (h * w)
        quality_score = 1.0

        # Penalize if lesion too small or too large
        if lesion_ratio < 0.05 or lesion_ratio > 0.8:
            quality_score *= 0.5

        # Check connectivity
        num_labels, _ = cv2.connectedComponents(mask)
        if num_labels > 2:  # Multiple disconnected regions
            quality_score *= 0.7

        if quality_score < 0.4:
            return None, quality_score

        return mask, quality_score

    except Exception as e:
        print(f"K-means segmentation error: {e}")
        return None, 0.0
def segment_lesion_simple_fallback(img_rgb: np.ndarray) -> np.ndarray:
    """Simple fallback segmentation using Otsu thresholding"""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Otsu thresholding
    _, mask = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

    return mask
def segment_lesion(img_rgb: np.ndarray, k: int = 2,
                   min_size: int = 500) -> Tuple[np.ndarray, str]:
    """
    Robust segmentation with fallback.
    Returns: (mask, method_used)
    """
    # Try K-means first
    mask, quality = segment_lesion_kmeans(img_rgb, k, min_size)

    if mask is not None and quality >= 0.4:
        return mask, "kmeans"

    # Fallback to simple thresholding
    print("  ‚ö†Ô∏è K-means failed, using fallback segmentation")
    mask = segment_lesion_simple_fallback(img_rgb)
    return mask, "fallback_otsu"
print("‚úÖ Segmentation functions ready!")

‚úÖ Segmentation functions ready!


In [10]:
# ============================================================================
# CELL 10: SHAPE FEATURE EXTRACTION
# ============================================================================
def compute_shape_features(mask: np.ndarray, pixels_per_mm: float = 10.0) -> Dict:
    """Extract geometric measurements from lesion mask"""
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return {"error": "No contours found"}

    # Get largest contour
    contour = max(contours, key=cv2.contourArea)

    # Basic measurements
    area_pixels = cv2.contourArea(contour)
    perimeter_pixels = cv2.arcLength(contour, True)
    x, y, w, h = cv2.boundingRect(contour)
    diameter_pixels = max(w, h)

    # Circularity (1.0 = perfect circle)
    circularity = 4 * np.pi * area_pixels / (perimeter_pixels ** 2) if perimeter_pixels > 0 else 0

    # Asymmetry calculation
    asymmetry_score = calculate_asymmetry(mask, contour)

    # Convert to mm
    features = {
        'area_pixels': area_pixels,
        'area_mm2': area_pixels / (pixels_per_mm ** 2),
        'perimeter_pixels': perimeter_pixels,
        'perimeter_mm': perimeter_pixels / pixels_per_mm,
        'diameter_pixels': diameter_pixels,
        'diameter_mm': diameter_pixels / pixels_per_mm,
        'circularity': circularity,
        'asymmetry': asymmetry_score,
        'bounding_box': (x, y, w, h),
        'pixels_per_mm': pixels_per_mm
    }

    return features
def calculate_asymmetry(mask: np.ndarray, contour: np.ndarray) -> float:
    """Calculate asymmetry score (0-100, higher = more asymmetric)"""
    try:
        # Get contour points
        points = contour.reshape(-1, 2).astype(np.float32)

        # Calculate centroid
        M = cv2.moments(contour)
        if M['m00'] == 0:
            return 50.0
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])

        # Center the points
        centered = points - np.array([cx, cy])

        # Compute covariance matrix and principal axes
        cov = np.cov(centered.T)
        eigenvalues, eigenvectors = np.linalg.eig(cov)

        # Get rotation angle
        angle = np.arctan2(eigenvectors[1, 0], eigenvectors[0, 0])

        # Rotate mask to align with principal axis
        h, w = mask.shape
        rotation_matrix = cv2.getRotationMatrix2D((cx, cy), np.degrees(angle), 1.0)
        rotated_mask = cv2.warpAffine(mask, rotation_matrix, (w, h))

        # Split mask in half and compare
        left_half = rotated_mask[:, :cx]
        right_half = rotated_mask[:, cx:]

        # Flip right half
        right_flipped = cv2.flip(right_half, 1)

        # Resize to match
        min_w = min(left_half.shape[1], right_flipped.shape[1])
        left_half = left_half[:, :min_w]
        right_flipped = right_flipped[:, :min_w]

        # Calculate difference
        diff = np.abs(left_half.astype(float) - right_flipped.astype(float))
        total_pixels = np.sum(mask > 0)

        if total_pixels == 0:
            return 50.0

        asymmetry = (np.sum(diff > 0) / total_pixels) * 100
        return min(asymmetry, 100.0)

    except Exception as e:
        return 50.0
print("‚úÖ Shape feature extraction ready!")

‚úÖ Shape feature extraction ready!


In [11]:
# ============================================================================
# CELL 11: COLOR ANALYSIS
# ============================================================================
def classify_dermatological_color(rgb: Tuple[int, int, int]) -> str:
    """Map RGB to clinical terminology"""
    r, g, b = rgb

    if r > 200 and g > 180 and b > 180:
        return "white/depigmented"
    elif r > 150 and g < 100 and b < 100:
        return "red/erythematous"
    elif r > 180 and g > 120 and b > 120:
        return "pink"
    elif r < 60 and g < 60 and b < 60:
        return "black/very dark brown"
    elif b > r and b > g and b > 80:
        return "blue-gray (regression)"
    elif r > 100:
        if g > 80:
            if r > 180:
                return "light brown/tan"
            elif r > 120:
                return "medium brown"
            else:
                return "dark brown"
        else:
            return "dark brown"
    return "brown"
def get_color_name(rgb: Tuple[int, int, int]) -> str:
    """Get nearest CSS3 color name - compatible with all webcolors versions"""
    try:
        return webcolors.rgb_to_name(rgb)
    except (ValueError, AttributeError):
        min_dist = float('inf')
        closest = 'gray'
        color_map = {
            'black': (0, 0, 0), 'white': (255, 255, 255),
            'gray': (128, 128, 128), 'brown': (165, 42, 42),
            'tan': (210, 180, 140), 'sienna': (160, 82, 45),
            'pink': (255, 192, 203), 'salmon': (250, 128, 114),
            'maroon': (128, 0, 0), 'navy': (0, 0, 128),
        }
        for name, (r_c, g_c, b_c) in color_map.items():
            dist = (r_c - rgb[0])**2 + (g_c - rgb[1])**2 + (b_c - rgb[2])**2
            if dist < min_dist:
                min_dist = dist
                closest = name
        return closest
def analyze_colors(img_rgb: np.ndarray, mask: np.ndarray,
                   n_colors: int = 4) -> Tuple[List[Dict], np.ndarray, np.ndarray]:
    """Analyze dominant colors in lesion"""
    # Extract lesion pixels
    lesion_pixels = img_rgb[mask > 0]

    if len(lesion_pixels) < 100:
        return [], None, None

    # K-means clustering
    kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=10)
    labels = kmeans.fit_predict(lesion_pixels)
    centers = kmeans.cluster_centers_.astype(int)

    # Count pixels per cluster
    unique, counts = np.unique(labels, return_counts=True)
    total = len(labels)

    # Sort by frequency
    sorted_indices = np.argsort(counts)[::-1]

    colors = []
    for idx in sorted_indices:
        rgb = tuple(centers[idx])
        colors.append({
            'rgb': rgb,
            'css_name': get_color_name(rgb),
            'clinical_name': classify_dermatological_color(rgb),
            'percentage': counts[idx] / total * 100,
            'count': counts[idx]
        })

    return colors, centers, labels
def analyze_color_distribution(img_rgb: np.ndarray, mask: np.ndarray,
                               centers: np.ndarray, labels: np.ndarray) -> Dict:
    """Analyze spatial distribution of colors"""
    # Get lesion pixel coordinates
    y_coords, x_coords = np.where(mask > 0)

    if len(y_coords) == 0:
        return {}

    # Find center
    center_y = np.mean(y_coords)
    center_x = np.mean(x_coords)

    # Calculate distances from center
    distances = np.sqrt((y_coords - center_y)**2 + (x_coords - center_x)**2)
    max_dist = np.max(distances) if len(distances) > 0 else 1

    distribution = {}
    for i, center in enumerate(centers):
        color_mask = labels == i
        color_dists = distances[color_mask]

        if len(color_dists) > 0:
            central = np.sum(color_dists < max_dist * 0.5)
            peripheral = np.sum(color_dists >= max_dist * 0.5)
            total = len(color_dists)

            if central > peripheral * 1.5:
                location = "predominantly central"
            elif peripheral > central * 1.5:
                location = "predominantly peripheral"
            else:
                location = "mixed distribution"

            distribution[i] = {
                'central_pct': central / total * 100,
                'peripheral_pct': peripheral / total * 100,
                'location': location
            }

    return distribution
print("‚úÖ Color analysis functions ready!")

‚úÖ Color analysis functions ready!


In [12]:
# ============================================================================
# CELL 12: BORDER AND TEXTURE ANALYSIS
# ============================================================================
def assess_border_quality(mask: np.ndarray) -> Dict:
    """Evaluate border characteristics"""
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return {"error": "No contours found"}

    contour = max(contours, key=cv2.contourArea)
    perimeter = cv2.arcLength(contour, True)

    # Douglas-Peucker approximation
    epsilon = 0.02 * perimeter
    approx = cv2.approxPolyDP(contour, epsilon, True)
    num_corners = len(approx)

    # Irregularity score
    irregularity_score = (num_corners / perimeter) * 1000 if perimeter > 0 else 0

    # Border definition
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    dilated = cv2.dilate(mask, kernel, iterations=1)
    eroded = cv2.erode(mask, kernel, iterations=1)
    border_zone = dilated - eroded
    border_width = np.sum(border_zone > 0) / perimeter if perimeter > 0 else 0

    # Classifications
    if border_width < 3:
        definition = "well-defined"
    elif border_width < 6:
        definition = "moderately defined"
    else:
        definition = "poorly-defined"

    if irregularity_score > 10:
        regularity = "highly irregular/notched"
    elif irregularity_score > 5:
        regularity = "moderately irregular"
    else:
        regularity = "regular"

    return {
        'irregularity_score': irregularity_score,
        'num_corners': num_corners,
        'border_width': border_width,
        'regularity': regularity,
        'definition': definition
    }
def analyze_texture_patterns(img_rgb: np.ndarray, mask: np.ndarray) -> Dict:
    """Assess surface texture and pigmentation patterns"""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    lesion_pixels = gray[mask > 0]

    if len(lesion_pixels) == 0:
        return {"error": "No lesion pixels"}

    # Texture variance
    variance = np.var(lesion_pixels)

    # Edge detection within lesion
    edges = cv2.Canny(gray, 50, 150)
    lesion_edges = edges[mask > 0]
    edge_density = np.sum(lesion_edges > 0) / len(lesion_pixels)

    # Pattern classification
    if edge_density > 0.15:
        pattern = "reticular/network pattern visible"
    elif edge_density > 0.05:
        pattern = "irregular pigmentation pattern"
    else:
        pattern = "homogeneous pigmentation"

    # Surface classification
    if variance > 1000:
        surface = "highly textured/varied"
    elif variance > 500:
        surface = "moderately textured"
    else:
        surface = "smooth/uniform"

    return {
        'variance': variance,
        'edge_density': edge_density,
        'pattern': pattern,
        'surface': surface
    }
print("‚úÖ Border and texture analysis ready!")

‚úÖ Border and texture analysis ready!


In [13]:
# ============================================================================
# CELL 13: BLUE-WHITE VEIL DETECTION
# ============================================================================
def detect_blue_white_veil(img_rgb: np.ndarray, mask: np.ndarray) -> Dict:
    """Detect blue-white veil (melanoma indicator)"""
    # Convert to HSV
    img_hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)

    # Blue-white color ranges
    # Blue: H=100-130 (in OpenCV: 50-65), S=30-150, V=80-180
    blue_lower = np.array([50, 30, 80])
    blue_upper = np.array([130, 150, 180])
    blue_mask = cv2.inRange(img_hsv, blue_lower, blue_upper)

    # White: Low saturation, high value
    white_lower = np.array([0, 0, 200])
    white_upper = np.array([180, 30, 255])
    white_mask = cv2.inRange(img_hsv, white_lower, white_upper)

    # Combined blue-white
    bw_mask = cv2.bitwise_or(blue_mask, white_mask)

    # Intersection with lesion
    bw_in_lesion = cv2.bitwise_and(bw_mask, mask)

    # Calculate coverage
    lesion_area = np.sum(mask > 0)
    bw_area = np.sum(bw_in_lesion > 0)
    coverage = (bw_area / lesion_area * 100) if lesion_area > 0 else 0

    # Determine distribution
    if coverage < 5:
        distribution = "none"
        present = False
    else:
        present = True
        # Check if focal or diffuse
        y_coords, x_coords = np.where(bw_in_lesion > 0)
        if len(y_coords) > 0:
            spread = np.std(y_coords) + np.std(x_coords)
            lesion_y, lesion_x = np.where(mask > 0)
            lesion_spread = np.std(lesion_y) + np.std(lesion_x)

            if spread < lesion_spread * 0.5:
                distribution = "focal"
            else:
                distribution = "diffuse"
        else:
            distribution = "none"

    return {
        'present': present,
        'coverage_percentage': round(coverage, 1),
        'distribution': distribution,
        'clinical_significance': "Blue-white veil is a melanoma-specific structure. "
                                 "Present in ~35% of melanomas. Presence significantly "
                                 "increases melanoma probability." if present else ""
    }
print("‚úÖ Blue-white veil detection ready!")

‚úÖ Blue-white veil detection ready!


In [14]:
# ============================================================================
# CELL 14: PIGMENT NETWORK DETECTION
# ============================================================================
def detect_pigment_network(img_rgb: np.ndarray, mask: np.ndarray) -> Dict:
    """Detect and classify pigment network"""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

    # Apply adaptive thresholding to detect lines
    block_size = 15
    adaptive_thresh = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, block_size, 2
    )

    # Apply within lesion
    network_mask = cv2.bitwise_and(adaptive_thresh, mask)

    # Morphological operations to identify network lines
    kernel_thin = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    kernel_thick = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

    thin_lines = cv2.morphologyEx(network_mask, cv2.MORPH_OPEN, kernel_thin)

    # Calculate network density
    lesion_area = np.sum(mask > 0)
    network_area = np.sum(thin_lines > 0)
    network_density = (network_area / lesion_area) if lesion_area > 0 else 0

    # Detect line thickness variation
    contours, _ = cv2.findContours(thin_lines, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return {
            'present': False,
            'type': 'absent',
            'characteristics': {},
            'clinical_significance': ""
        }

    # Analyze line characteristics
    thicknesses = []
    for contour in contours:
        if cv2.contourArea(contour) > 10:
            _, _, w, h = cv2.boundingRect(contour)
            thicknesses.append(min(w, h))

    if len(thicknesses) > 0:
        thickness_mean = np.mean(thicknesses)
        thickness_std = np.std(thicknesses)
        thickness_variation = thickness_std / thickness_mean if thickness_mean > 0 else 0
    else:
        thickness_mean = 0
        thickness_variation = 0

    # Classify network type
    if network_density < 0.05:
        network_type = "absent"
        present = False
    elif thickness_variation > 0.5:
        network_type = "atypical_network"
        present = True
    elif thickness_mean > 3:
        network_type = "broadened_network"
        present = True
    else:
        network_type = "typical_network"
        present = True

    significance = ""
    if network_type == "atypical_network":
        significance = "Atypical network with irregular lines is concerning for melanoma."
    elif network_type == "broadened_network":
        significance = "Broadened network may indicate dysplastic nevus."
    elif network_type == "typical_network":
        significance = "Typical regular network is common in benign nevi."

    return {
        'present': present,
        'type': network_type,
        'characteristics': {
            'density': round(network_density, 3),
            'thickness_mean': round(thickness_mean, 1),
            'thickness_variation': round(thickness_variation, 2)
        },
        'clinical_significance': significance
    }
print("‚úÖ Pigment network detection ready!")

‚úÖ Pigment network detection ready!


In [15]:
# ============================================================================
# CELL 15: GLOBULES, DOTS & VASCULAR STRUCTURES
# ============================================================================
def detect_globules_and_dots(img_rgb: np.ndarray, mask: np.ndarray,
                             pixels_per_mm: float = 10.0) -> Dict:
    """Detect globular and dot patterns"""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

    # Blob detection with LoG
    from skimage.feature import blob_log

    # Normalize for blob detection
    gray_norm = gray / 255.0

    try:
        blobs = blob_log(gray_norm, min_sigma=1, max_sigma=10,
                         num_sigma=5, threshold=0.1)
    except:
        blobs = np.array([])

    # Filter blobs within lesion
    dots = []
    globules = []

    for blob in blobs:
        y, x, sigma = blob
        y, x = int(y), int(x)

        if y < mask.shape[0] and x < mask.shape[1] and mask[y, x] > 0:
            radius_pixels = sigma * np.sqrt(2)
            radius_mm = radius_pixels / pixels_per_mm
            diameter_mm = radius_mm * 2

            # Get color at blob center
            color = img_rgb[y, x]

            blob_info = {
                'x': x, 'y': y,
                'diameter_mm': round(diameter_mm, 2),
                'color': classify_dermatological_color(tuple(color))
            }

            if diameter_mm < 1:
                dots.append(blob_info)
            elif diameter_mm < 3:
                globules.append(blob_info)

    # Analyze distribution
    def get_distribution(items, mask):
        if not items:
            return "none"
        y_coords = [item['y'] for item in items]
        x_coords = [item['x'] for item in items]

        lesion_y, lesion_x = np.where(mask > 0)
        center_y, center_x = np.mean(lesion_y), np.mean(lesion_x)

        central_count = sum(1 for i in range(len(items))
                           if np.sqrt((y_coords[i]-center_y)**2 +
                                     (x_coords[i]-center_x)**2) <
                           np.std(lesion_y))

        if central_count > len(items) * 0.6:
            return "central"
        elif central_count < len(items) * 0.3:
            return "peripheral"
        else:
            return "scattered"

    dots_dist = get_distribution(dots, mask)
    globules_dist = get_distribution(globules, mask)

    # Count by color
    blue_gray_dots = len([d for d in dots if 'blue' in d['color'].lower()])

    return {
        'dots_count': len(dots),
        'globules_count': len(globules),
        'blue_gray_dots': blue_gray_dots,
        'dots_distribution': dots_dist,
        'globules_distribution': globules_dist,
        'clinical_significance': (
            "Irregular dots and blue-gray dots suggest melanocytic neoplasm."
            if blue_gray_dots > 0 else
            "Regular globular pattern common in benign nevi." if len(globules) > 5 else ""
        )
    }
def detect_vascular_structures(img_rgb: np.ndarray, mask: np.ndarray) -> Dict:
    """Identify vascular patterns"""
    # Enhance red channel
    red_channel = img_rgb[:, :, 0].astype(float)
    green_channel = img_rgb[:, :, 1].astype(float)

    # Red enhancement
    vessel_enhanced = np.clip(red_channel - green_channel * 0.5, 0, 255).astype(np.uint8)

    # Apply within lesion
    vessel_enhanced = cv2.bitwise_and(vessel_enhanced, mask)

    # Threshold for vessels
    _, vessel_mask = cv2.threshold(vessel_enhanced, 50, 255, cv2.THRESH_BINARY)

    # Morphological thinning
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    vessel_mask = cv2.morphologyEx(vessel_mask, cv2.MORPH_OPEN, kernel)

    # Calculate vessel density
    lesion_area = np.sum(mask > 0)
    vessel_area = np.sum(vessel_mask > 0)
    vessel_density = (vessel_area / lesion_area) if lesion_area > 0 else 0

    # Detect vessel patterns by shape analysis
    contours, _ = cv2.findContours(vessel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    vessel_types = {
        'dotted': 0,
        'linear': 0,
        'curved': 0
    }

    for contour in contours:
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)

        if perimeter == 0 or area < 5:
            continue

        circularity = 4 * np.pi * area / (perimeter ** 2)

        if circularity > 0.7:
            vessel_types['dotted'] += 1
        elif circularity < 0.3:
            vessel_types['linear'] += 1
        else:
            vessel_types['curved'] += 1

    # Determine dominant pattern
    total = sum(vessel_types.values())
    if total == 0:
        dominant = "none"
        significance = ""
    else:
        dominant = max(vessel_types, key=vessel_types.get)

        if vessel_types['dotted'] > 0 and vessel_types['linear'] > 0:
            dominant = "polymorphous"
            significance = "Polymorphous vessels highly suggestive of melanoma."
        elif dominant == 'dotted':
            significance = "Dotted vessels can be seen in melanoma and Spitz nevi."
        elif dominant == 'linear':
            significance = "Linear irregular vessels concerning for malignancy."
        else:
            significance = ""

    return {
        'present': total > 0,
        'density': round(vessel_density, 3),
        'types': vessel_types,
        'dominant_pattern': dominant,
        'clinical_significance': significance
    }
print("‚úÖ Globules, dots, and vascular detection ready!")

‚úÖ Globules, dots, and vascular detection ready!


In [16]:
# ============================================================================
# CELL 16: COMPREHENSIVE DERMOSCOPIC ANALYSIS
# ============================================================================
def comprehensive_dermoscopic_analysis(img_rgb: np.ndarray, mask: np.ndarray,
                                       pixels_per_mm: float = 10.0) -> Dict:
    """Integrate all dermoscopic structure detection"""
    results = {
        'blue_white_veil': detect_blue_white_veil(img_rgb, mask),
        'pigment_network': detect_pigment_network(img_rgb, mask),
        'globules_dots': detect_globules_and_dots(img_rgb, mask, pixels_per_mm),
        'vascular': detect_vascular_structures(img_rgb, mask)
    }

    # Count concerning features
    concerning_count = 0
    concerning_features = []

    if results['blue_white_veil']['present']:
        concerning_count += 1
        concerning_features.append("Blue-white veil")

    if results['pigment_network']['type'] == 'atypical_network':
        concerning_count += 1
        concerning_features.append("Atypical pigment network")

    if results['globules_dots']['blue_gray_dots'] > 0:
        concerning_count += 1
        concerning_features.append("Blue-gray dots")

    if results['vascular']['dominant_pattern'] == 'polymorphous':
        concerning_count += 1
        concerning_features.append("Polymorphous vessels")

    results['summary'] = {
        'concerning_structure_count': concerning_count,
        'concerning_features': concerning_features,
        'melanoma_probability': 'HIGH' if concerning_count >= 3 else
                               'MODERATE' if concerning_count >= 2 else
                               'LOW' if concerning_count <= 1 else 'MODERATE'
    }

    return results
print("‚úÖ Comprehensive dermoscopic analysis ready!")

‚úÖ Comprehensive dermoscopic analysis ready!


In [17]:
# ============================================================================
# CELL 17: ABCDE RISK ASSESSMENT
# ============================================================================
def calculate_abcde_risk(colors: List[Dict], shape: Dict, border: Dict,
                         dermoscopic: Dict, temporal_changes: Optional[Dict] = None) -> Dict:
    """Automated melanoma risk assessment using ABCDE criteria"""
    risk_factors = []
    scores = {}

    # A - Asymmetry
    asymmetry = shape.get('asymmetry', 0)
    scores['A_asymmetry'] = asymmetry
    if asymmetry > 30:
        risk_factors.append(f"‚ö†Ô∏è A: Significant asymmetry detected (score: {asymmetry:.1f})")

    # B - Border
    irregularity = border.get('irregularity_score', 0)
    circularity = shape.get('circularity', 1)
    scores['B_irregularity'] = irregularity
    scores['B_circularity'] = circularity

    if irregularity > 8 or circularity < 0.3:
        risk_factors.append(f"‚ö†Ô∏è B: Irregular border (irregularity: {irregularity:.1f}, "
                           f"circularity: {circularity:.2f})")

    # C - Color
    num_colors = len([c for c in colors if c['percentage'] > 5])
    has_black = any('black' in c['clinical_name'].lower() for c in colors)
    has_blue_gray = any('blue' in c['clinical_name'].lower() for c in colors)

    scores['C_num_colors'] = num_colors
    scores['C_has_black'] = has_black
    scores['C_has_blue_gray'] = has_blue_gray

    if num_colors >= 4:
        risk_factors.append(f"‚ö†Ô∏è C: Multiple colors present ({num_colors} distinct tones)")
    if has_black:
        risk_factors.append("‚ö†Ô∏è C: Black pigmentation present")
    if has_blue_gray:
        risk_factors.append("‚ö†Ô∏è C: Blue-gray areas (possible regression)")

    # D - Diameter
    diameter = shape.get('diameter_mm', 0)
    scores['D_diameter'] = diameter
    if diameter > 6:
        risk_factors.append(f"‚ö†Ô∏è D: Diameter > 6mm ({diameter:.1f}mm)")

    # E - Evolving
    if temporal_changes:
        size_change = temporal_changes.get('size_change_pct', 0)
        scores['E_size_change'] = size_change

        if size_change > 20:
            risk_factors.append(f"‚ö†Ô∏è E: Rapid growth detected ({size_change:.1f}% increase)")
        elif size_change > 10:
            risk_factors.append(f"‚ö†Ô∏è E: Moderate growth ({size_change:.1f}% increase)")
    else:
        scores['E_evolving'] = "Not assessed (no temporal data)"

    # Add dermoscopic structure risks
    if dermoscopic['summary']['concerning_structure_count'] >= 2:
        risk_factors.append(f"‚ö†Ô∏è Dermoscopic: {dermoscopic['summary']['concerning_structure_count']} "
                           f"concerning structures ({', '.join(dermoscopic['summary']['concerning_features'])})")

    # Overall risk level
    if len(risk_factors) >= 4:
        overall_risk = "HIGH"
    elif len(risk_factors) >= 2:
        overall_risk = "MODERATE"
    else:
        overall_risk = "LOW"

    return {
        'risk_factors': risk_factors,
        'scores': scores,
        'overall_risk': overall_risk,
        'criteria_met': len(risk_factors)
    }
print("‚úÖ ABCDE risk assessment ready!")

‚úÖ ABCDE risk assessment ready!


In [18]:
# ============================================================================
# CELL 18: TEMPORAL COMPARISON FOR "E" (EVOLVING)
# ============================================================================
def compare_temporal_images(current_img: np.ndarray, previous_img: np.ndarray,
                           days_between: int, pixels_per_mm: float = 10.0) -> Dict:
    """Detect changes over time for Evolving criterion"""
    try:
        # Preprocess both images
        current_clean = remove_hairs(current_img)
        previous_clean = remove_hairs(previous_img)

        # Segment both
        current_mask, _ = segment_lesion(current_clean)
        previous_mask, _ = segment_lesion(previous_clean)

        # Calculate areas
        current_area = np.sum(current_mask > 0) / (pixels_per_mm ** 2)
        previous_area = np.sum(previous_mask > 0) / (pixels_per_mm ** 2)

        # Size change
        if previous_area > 0:
            size_change_pct = (current_area - previous_area) / previous_area * 100
        else:
            size_change_pct = 0

        # Growth rate (per month)
        months = days_between / 30.0
        growth_rate = size_change_pct / months if months > 0 else 0

        # Shape changes (asymmetry)
        current_shape = compute_shape_features(current_mask, pixels_per_mm)
        previous_shape = compute_shape_features(previous_mask, pixels_per_mm)

        asymmetry_change = current_shape.get('asymmetry', 0) - previous_shape.get('asymmetry', 0)

        # Color changes
        current_colors, _, _ = analyze_colors(current_clean, current_mask)
        previous_colors, _, _ = analyze_colors(previous_clean, previous_mask)

        current_color_names = set(c['clinical_name'] for c in current_colors)
        previous_color_names = set(c['clinical_name'] for c in previous_colors)

        new_colors = current_color_names - previous_color_names

        # Urgency assessment
        if size_change_pct > 20 or growth_rate > 30:
            urgency = "URGENT - Rapid growth detected"
        elif size_change_pct > 10 or len(new_colors) > 0:
            urgency = "CONCERNING - Moderate changes"
        else:
            urgency = "STABLE - Minimal changes"

        return {
            'size_change_pct': round(size_change_pct, 1),
            'growth_rate_per_month': round(growth_rate, 1),
            'asymmetry_change': round(asymmetry_change, 1),
            'new_colors': list(new_colors),
            'current_area_mm2': round(current_area, 1),
            'previous_area_mm2': round(previous_area, 1),
            'days_between': days_between,
            'urgency': urgency
        }

    except Exception as e:
        return {'error': str(e)}
print("‚úÖ Temporal comparison ready!")

‚úÖ Temporal comparison ready!


In [19]:
# ============================================================================
# CELL 19: UNCERTAINTY QUANTIFICATION
# ============================================================================
def calculate_prediction_confidence(calibration_info: Dict,
                                    segmentation_method: str,
                                    rag_results: List,
                                    image_quality: float = 0.8) -> Dict:
    """Calculate confidence scores for the analysis"""

    # Calibration confidence
    cal_confidence = calibration_info.get('confidence', 0.3)
    cal_method = calibration_info.get('method', 'estimated')

    # Segmentation confidence
    if segmentation_method == 'kmeans':
        seg_confidence = 0.85
    elif segmentation_method == 'sam':
        seg_confidence = 0.90
    else:
        seg_confidence = 0.60

    # RAG confidence (based on source diversity)
    if rag_results:
        source_types = set(doc.metadata.get('type', '') for doc, _ in rag_results)
        source_diversity = len(source_types) / 4  # Max 4 types
        high_weight_sources = sum(1 for doc, _ in rag_results
                                  if doc.metadata.get('weight', 0) >= 1.5)
        rag_confidence = min(0.5 + source_diversity * 0.3 + high_weight_sources * 0.05, 1.0)
    else:
        rag_confidence = 0.3

    # Overall confidence (weighted average)
    overall = (
        cal_confidence * 0.2 +
        seg_confidence * 0.25 +
        image_quality * 0.25 +
        rag_confidence * 0.3
    )

    # Confidence level
    if overall > 0.75:
        level = "HIGH"
    elif overall > 0.5:
        level = "MODERATE"
    else:
        level = "LOW"

    return {
        'overall': round(overall, 2),
        'level': level,
        'breakdown': {
            'calibration': round(cal_confidence, 2),
            'segmentation': round(seg_confidence, 2),
            'image_quality': round(image_quality, 2),
            'literature_support': round(rag_confidence, 2)
        },
        'calibration_method': cal_method
    }
def generate_uncertainty_report(confidence: Dict) -> str:
    """Generate human-readable uncertainty report"""
    report = f"""
ANALYSIS CONFIDENCE ASSESSMENT
{'='*50}
Overall Confidence: {confidence['level']} ({confidence['overall']*100:.0f}%)
Confidence Breakdown:
‚Ä¢ Size Measurement: {confidence['breakdown']['calibration']*100:.0f}% ({confidence['calibration_method']})
‚Ä¢ Segmentation: {confidence['breakdown']['segmentation']*100:.0f}%
‚Ä¢ Image Quality: {confidence['breakdown']['image_quality']*100:.0f}%
‚Ä¢ Literature Support: {confidence['breakdown']['literature_support']*100:.0f}%
"""

    # Add warnings for low confidence areas
    warnings = []
    if confidence['breakdown']['calibration'] < 0.5:
        warnings.append("‚ö†Ô∏è Size measurements estimated - no calibration reference detected")
    if confidence['breakdown']['segmentation'] < 0.7:
        warnings.append("‚ö†Ô∏è Segmentation quality may affect accuracy")
    if confidence['breakdown']['literature_support'] < 0.5:
        warnings.append("‚ö†Ô∏è Limited literature support for this case")

    if warnings:
        report += "Uncertainty Factors:\n" + "\n".join(warnings) + "\n"

    return report
print("‚úÖ Uncertainty quantification ready!")

‚úÖ Uncertainty quantification ready!


In [20]:
# ============================================================================
# CELL 20: ENHANCED DESCRIPTION GENERATION
# ============================================================================
def make_enhanced_description(colors: List[Dict], shape: Dict, texture: Dict,
                              border: Dict, color_distribution: Dict,
                              dermoscopic: Dict, calibration: Dict,
                              abcde: Dict, temporal: Optional[Dict] = None,
                              confidence: Optional[Dict] = None) -> str:
    """Generate comprehensive clinical report"""

    lines = []
    lines.append("DERMATOLOGICAL LESION ANALYSIS")
    lines.append("=" * 60)

    # Calibration Status
    lines.append("\nüìè CALIBRATION STATUS:")
    cal_conf = calibration.get('confidence', 0.3)
    cal_method = calibration.get('method', 'estimated')
    if cal_conf > 0.7:
        lines.append(f"  ‚úÖ {cal_method.replace('_', ' ').title()} (Confidence: {cal_conf*100:.0f}%)")
    else:
        lines.append(f"  ‚ö†Ô∏è Estimated measurements - {cal_method} (Confidence: {cal_conf*100:.0f}%)")

    # Morphology
    lines.append("\nüìê MORPHOLOGY:")
    lines.append(f"  ‚Ä¢ Size: {shape.get('diameter_mm', 0):.1f}mm diameter, "
                f"{shape.get('area_mm2', 0):.1f}mm¬≤ area")

    asym = shape.get('asymmetry', 0)
    if asym > 40:
        asym_class = "markedly asymmetric"
    elif asym > 25:
        asym_class = "moderately asymmetric"
    else:
        asym_class = "relatively symmetric"
    lines.append(f"  ‚Ä¢ Shape: {asym_class} (score: {asym:.1f})")
    lines.append(f"  ‚Ä¢ Circularity: {shape.get('circularity', 0):.2f} (1.0 = perfect circle)")
    lines.append(f"  ‚Ä¢ Border: {border.get('regularity', 'unknown')}, {border.get('definition', 'unknown')}")

    # Color Analysis
    lines.append("\nüé® COLOR ANALYSIS:")
    significant_colors = [c for c in colors if c['percentage'] > 5]
    lines.append(f"  ‚Ä¢ Distinct color zones: {len(significant_colors)}")
    for c in significant_colors[:5]:
        dist = color_distribution.get(colors.index(c), {}).get('location', 'unknown')
        lines.append(f"  ‚Ä¢ {c['clinical_name']}: {c['percentage']:.1f}% ({dist})")

    if len(significant_colors) >= 4:
        lines.append("  ‚Ä¢ Pattern: VARIEGATED (multiple distinct colors)")
    elif len(significant_colors) >= 2:
        lines.append("  ‚Ä¢ Pattern: Multi-colored")
    else:
        lines.append("  ‚Ä¢ Pattern: Homogeneous")

    # Dermoscopic Structures
    lines.append("\nüî¨ DERMOSCOPIC STRUCTURES:")
    bwv = dermoscopic.get('blue_white_veil', {})
    lines.append(f"  ‚Ä¢ Blue-white veil: {'‚úÖ PRESENT' if bwv.get('present') else '‚ùå Absent'}")
    if bwv.get('present'):
        lines.append(f"    Coverage: {bwv.get('coverage_percentage', 0):.1f}%, "
                    f"Distribution: {bwv.get('distribution', 'unknown')}")

    pn = dermoscopic.get('pigment_network', {})
    lines.append(f"  ‚Ä¢ Pigment network: {pn.get('type', 'unknown').replace('_', ' ').title()}")

    gd = dermoscopic.get('globules_dots', {})
    lines.append(f"  ‚Ä¢ Dots: {gd.get('dots_count', 0)}, Globules: {gd.get('globules_count', 0)}")
    if gd.get('blue_gray_dots', 0) > 0:
        lines.append(f"    ‚ö†Ô∏è Blue-gray dots present: {gd.get('blue_gray_dots', 0)}")

    vasc = dermoscopic.get('vascular', {})
    lines.append(f"  ‚Ä¢ Vascular pattern: {vasc.get('dominant_pattern', 'none')}")

    # Texture
    lines.append("\nüìä TEXTURE & SURFACE:")
    lines.append(f"  ‚Ä¢ Pattern: {texture.get('pattern', 'unknown')}")
    lines.append(f"  ‚Ä¢ Surface: {texture.get('surface', 'unknown')}")
    lines.append(f"  ‚Ä¢ Complexity score: {texture.get('variance', 0):.1f}")

    # ABCDE Assessment
    lines.append("\n‚ö†Ô∏è ABCDE MELANOMA RISK ASSESSMENT:")
    if abcde.get('risk_factors'):
        for rf in abcde['risk_factors']:
            lines.append(f"  {rf}")
        lines.append(f"\n  Overall Risk: {abcde.get('overall_risk', 'UNKNOWN')}")
    else:
        lines.append("  ‚úÖ No major ABCDE risk factors detected")

    # Temporal Changes
    if temporal and 'error' not in temporal:
        lines.append("\nüìà TEMPORAL EVOLUTION:")
        lines.append(f"  ‚Ä¢ Size change: {temporal.get('size_change_pct', 0):+.1f}% "
                    f"over {temporal.get('days_between', 0)} days")
        lines.append(f"  ‚Ä¢ Growth rate: {temporal.get('growth_rate_per_month', 0):.1f}%/month")
        if temporal.get('new_colors'):
            lines.append(f"  ‚Ä¢ New colors: {', '.join(temporal.get('new_colors', []))}")
        lines.append(f"  ‚Ä¢ Assessment: {temporal.get('urgency', 'Unknown')}")

    # Confidence
    if confidence:
        lines.append(f"\nüìä ANALYSIS CONFIDENCE: {confidence.get('level', 'Unknown')} "
                    f"({confidence.get('overall', 0)*100:.0f}%)")

    # Summary
    lines.append("\n" + "=" * 60)
    lines.append("SUMMARY FOR CLINICAL CORRELATION:")

    summary_parts = []
    summary_parts.append(f"This {shape.get('diameter_mm', 0):.1f}mm lesion presents with "
                        f"{len(significant_colors)} distinct color zones")

    if abcde.get('overall_risk') == 'HIGH':
        summary_parts.append("Multiple concerning ABCDE criteria warrant urgent evaluation.")
    elif abcde.get('overall_risk') == 'MODERATE':
        summary_parts.append("Some concerning features warrant dermatological evaluation.")
    else:
        summary_parts.append("Features appear largely benign but clinical correlation recommended.")

    lines.append("  " + " ".join(summary_parts))

    # Disclaimer
    lines.append("\n" + "=" * 60)
    lines.append("‚öïÔ∏è DISCLAIMER:")
    lines.append("  This is an automated analysis for research/educational purposes.")
    lines.append("  NOT a substitute for clinical evaluation by a qualified dermatologist.")
    lines.append("  All findings must be confirmed with histopathological examination.")

    return "\n".join(lines)
print("‚úÖ Description generation ready!")

‚úÖ Description generation ready!


In [21]:
# ============================================================================
# CELL 21: MAIN OPENCV ANALYSIS PIPELINE
# ============================================================================
def analyze_lesion_opencv(pil_image: Image.Image,
                          previous_image: Optional[Image.Image] = None,
                          days_between: int = 0) -> Tuple[str, np.ndarray, np.ndarray, np.ndarray, Dict]:
    """Complete OpenCV analysis pipeline"""
    try:
        # Step 1: Preprocess
        img_rgb = load_and_preprocess_pil(pil_image)
        original = img_rgb.copy()

        # Step 2: Calibration
        pixels_per_mm, cal_conf, cal_method = detect_reference_object(img_rgb)
        calibration = {
            'pixels_per_mm': pixels_per_mm,
            'confidence': cal_conf,
            'method': cal_method
        }

        # Step 3: Remove hair
        cleaned = remove_hairs(img_rgb)

        # Step 4: Segment lesion
        mask, seg_method = segment_lesion(cleaned)

        # Step 5: Shape features
        shape = compute_shape_features(mask, pixels_per_mm)

        # Step 6: Color analysis
        colors, centers, labels = analyze_colors(cleaned, mask)

        # Step 7: Color distribution
        if centers is not None and labels is not None:
            color_dist = analyze_color_distribution(cleaned, mask, centers, labels)
        else:
            color_dist = {}

        # Step 8: Texture
        texture = analyze_texture_patterns(cleaned, mask)

        # Step 9: Border
        border = assess_border_quality(mask)

        # Step 10: Dermoscopic structures
        dermoscopic = comprehensive_dermoscopic_analysis(cleaned, mask, pixels_per_mm)

        # Step 11: Temporal comparison
        temporal = None
        if previous_image is not None and days_between > 0:
            prev_rgb = load_and_preprocess_pil(previous_image)
            temporal = compare_temporal_images(cleaned, prev_rgb, days_between, pixels_per_mm)

        # Step 12: ABCDE risk
        abcde = calculate_abcde_risk(colors, shape, border, dermoscopic, temporal)

        # Step 13: Confidence
        confidence = calculate_prediction_confidence(
            calibration, seg_method, [], 0.8
        )

        # Step 14: Generate description
        description = make_enhanced_description(
            colors, shape, texture, border, color_dist,
            dermoscopic, calibration, abcde, temporal, confidence
        )

        # Compile all data
        all_data = {
            'calibration': calibration,
            'shape': shape,
            'colors': colors,
            'color_distribution': color_dist,
            'texture': texture,
            'border': border,
            'dermoscopic': dermoscopic,
            'abcde': abcde,
            'temporal': temporal,
            'confidence': confidence,
            'segmentation_method': seg_method
        }

        return description, original, cleaned, mask, all_data

    except Exception as e:
        error_msg = f"OpenCV Analysis Error:\n{traceback.format_exc()}"
        return error_msg, None, None, None, {}
print("‚úÖ Main OpenCV analysis pipeline ready!")

‚úÖ Main OpenCV analysis pipeline ready!


In [22]:
# ============================================================================
# CELL 22: MAIN GRADIO ANALYSIS FUNCTION
# ============================================================================
def analyze_lesion_complete(image, previous_image, previous_date,
                            use_opencv, manual_data,
                            max_tokens, temperature, num_sources):
    """Main analysis orchestrator for Gradio interface"""
    try:
        # Input validation
        if image is None:
            return ("‚ùå Please upload an image", "", "", "", "")
        # Convert to PIL RGB
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
        image = image.convert('RGB')
        # Initialize outputs
        opencv_output = ""
        dermoscopic_output = ""
        sources_output = ""
        diagnosis_output = ""
        confidence_output = ""
        precomputed_data = None
        analysis_mode = ""
        all_analysis_data = {}
        # Parse previous date
        days_between = 0
        prev_image_pil = None
        if previous_image is not None and previous_date:
            try:
                prev_date = datetime.strptime(previous_date.strip(), "%Y-%m-%d")
                days_between = (datetime.now() - prev_date).days
                prev_image_pil = Image.fromarray(previous_image).convert('RGB')
            except:
                pass
        # Feature extraction
        if use_opencv:
            print("üî¨ Running OpenCV feature extraction...")
            description, orig, cleaned, mask, all_analysis_data = analyze_lesion_opencv(
                image, prev_image_pil, days_between
            )
            opencv_output = description
            precomputed_data = description
            analysis_mode = "üî¨ OpenCV Feature Extraction"
            # Format dermoscopic output
            if all_analysis_data and 'dermoscopic' in all_analysis_data:
                derm = all_analysis_data['dermoscopic']
                dermoscopic_output = format_dermoscopic_report(derm)
            else:
                dermoscopic_output = "Dermoscopic analysis not available (OpenCV analysis may have encountered an issue)"
            # Confidence output
            if all_analysis_data and 'confidence' in all_analysis_data:
                confidence_output = generate_uncertainty_report(all_analysis_data['confidence'])
            else:
                confidence_output = "Confidence data not available"
        elif manual_data and manual_data.strip():
            precomputed_data = manual_data
            analysis_mode = "üìä Manual Pre-computed Data"
        else:
            analysis_mode = "üëÅÔ∏è Direct VLM Analysis"
        # RAG Retrieval
        print("üìö Retrieving relevant medical literature...")
        query_terms = [
            "melanoma", "atypical nevus", "dysplastic nevus", "ABCDE criteria",
            "dermoscopy patterns", "pigment network", "blue-white veil",
            "skin cancer diagnosis", "benign nevus", "basal cell carcinoma"
        ]
        query = " ".join(query_terms)
        rag_results = weighted_retrieval(vectorstore, query, k=num_sources)
        # Format sources
        sources_lines = [f"**{analysis_mode}**\n"]
        sources_lines.append(f"**Found {len(rag_results)} relevant sources:**\n")
        retrieved_context = ""
        for i, (doc, score) in enumerate(rag_results, 1):
            source_type = doc.metadata.get('type', 'research')
            weight = doc.metadata.get('weight', 1.0)
            sources_lines.append(f"**[Source {i}]** ({source_type.title()}, Weight: {weight})")
            sources_lines.append(f"Relevance Score: {score:.4f}")
            sources_lines.append(f"{doc.page_content[:300]}...")
            sources_lines.append("-" * 50)
            retrieved_context += f"\n[Source {i}] ({source_type}):\n{doc.page_content}\n"
        sources_output = "\n".join(sources_lines)
        # Build VLM prompt
        print("üß† Running VLM analysis...")
        analysis_prompt = build_vlm_prompt(precomputed_data, retrieved_context)
        # VLM Inference
        messages = [
            {"role": "user", "content": [
                {"type": "image"},
                {"type": "text", "text": analysis_prompt}
            ]}
        ]
        input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
        # Prepare inputs
        inputs = processor(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt"
        ).to(model.device)
        # Generate
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=int(max_tokens),
                do_sample=True,
                temperature=float(temperature)
            )
        # Decode
        response = processor.decode(output[0], skip_special_tokens=True)
        # Extract assistant response
        if "assistant" in response.lower():
            diagnosis_output = response.split("assistant")[-1].strip()
        else:
            diagnosis_output = response
        # Clean up response
        if diagnosis_output.startswith(":"):
            diagnosis_output = diagnosis_output[1:].strip()
        # Save report
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"analysis_{timestamp}.txt"
        full_report = f"""
SKIN LESION ANALYSIS REPORT
{'='*60}
Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
Analysis Mode: {analysis_mode}
{'='*60}
{opencv_output if opencv_output else "OpenCV analysis not used"}
{'='*60}
EVIDENCE-BASED DIAGNOSIS:
{'='*60}
{diagnosis_output}
{'='*60}
SOURCES CONSULTED:
{'='*60}
{sources_output}
{'='*60}
DISCLAIMER: For research and educational purposes only.
NOT a substitute for professional medical evaluation.
{'='*60}
"""
        with open(filename, 'w') as f:
            f.write(full_report)
        print(f"üíæ Report saved: {filename}")
        return (opencv_output, dermoscopic_output, sources_output,
                diagnosis_output, confidence_output)
    except Exception as e:
        error_msg = f"Analysis Error:\n{traceback.format_exc()}"
        return (error_msg, "", "", "", "")
def format_dermoscopic_report(derm: Dict) -> str:
    """Format dermoscopic analysis for display"""
    lines = ["DERMOSCOPIC STRUCTURE ANALYSIS", "=" * 50, ""]
    # Blue-white veil
    bwv = derm.get('blue_white_veil', {})
    lines.append("BLUE-WHITE VEIL:")
    if bwv.get('present'):
        lines.append(f"  ‚úÖ PRESENT - Coverage: {bwv.get('coverage_percentage', 0):.1f}%")
        lines.append(f"  Distribution: {bwv.get('distribution', 'unknown')}")
        lines.append(f"  {bwv.get('clinical_significance', '')}")
    else:
        lines.append("  ‚ùå Absent")
    # Pigment network
    pn = derm.get('pigment_network', {})
    lines.append("\nPIGMENT NETWORK:")
    lines.append(f"  Type: {pn.get('type', 'unknown').replace('_', ' ').title()}")
    if pn.get('characteristics'):
        chars = pn['characteristics']
        lines.append(f"  Density: {chars.get('density', 0):.3f}")
        lines.append(f"  Thickness variation: {chars.get('thickness_variation', 0):.2f}")
    if pn.get('clinical_significance'):
        lines.append(f"  {pn.get('clinical_significance', '')}")
    # Globules and dots
    gd = derm.get('globules_dots', {})
    lines.append("\nGLOBULES AND DOTS:")
    lines.append(f"  Dots detected: {gd.get('dots_count', 0)}")
    lines.append(f"  Globules detected: {gd.get('globules_count', 0)}")
    if gd.get('blue_gray_dots', 0) > 0:
        lines.append(f"  ‚ö†Ô∏è Blue-gray dots: {gd.get('blue_gray_dots', 0)}")
    lines.append(f"  Distribution: {gd.get('dots_distribution', 'unknown')}")
    # Vascular
    vasc = derm.get('vascular', {})
    lines.append("\nVASCULAR STRUCTURES:")
    if vasc.get('present'):
        lines.append(f"  Pattern: {vasc.get('dominant_pattern', 'none')}")
        types = vasc.get('types', {})
        lines.append(f"  Types: Dotted={types.get('dotted', 0)}, "
                    f"Linear={types.get('linear', 0)}, Curved={types.get('curved', 0)}")
        if vasc.get('clinical_significance'):
            lines.append(f"  {vasc.get('clinical_significance', '')}")
    else:
        lines.append("  ‚ùå Not significant")
    # Summary
    summary = derm.get('summary', {})
    lines.append("\n" + "=" * 50)
    lines.append(f"CONCERNING STRUCTURES: {summary.get('concerning_structure_count', 0)}")
    if summary.get('concerning_features'):
        lines.append(f"Features: {', '.join(summary.get('concerning_features', []))}")
    lines.append(f"MELANOMA PROBABILITY: {summary.get('melanoma_probability', 'Unknown')}")
    return "\n".join(lines)
def build_vlm_prompt(precomputed_data: Optional[str], retrieved_context: str) -> str:
    """Build the VLM analysis prompt"""
    if precomputed_data:
        prompt = f"""You are an expert dermatologist analyzing a skin lesion.
QUANTITATIVE MEASUREMENTS FROM COMPUTER VISION:
{precomputed_data}
{'='*60}
RELEVANT MEDICAL LITERATURE:
{'='*60}
{retrieved_context}
{'='*60}
EVIDENCE-BASED ANALYSIS INSTRUCTIONS:
{'='*60}
Based on the quantitative measurements AND medical literature, provide a structured diagnosis.
**MANDATORY: Cite sources using [Source 1], [Source 2], etc.**
Structure your response:
1. DIFFERENTIAL DIAGNOSIS (Ranked by Likelihood):
   - Provide 3-5 diagnoses with likelihood levels
   - Reference specific measurements
   - Cite literature for each
2. CONCERNING FEATURES WITH EVIDENCE:
   - List specific features from measurements
   - Explain clinical significance with citations
3. CLINICAL RECOMMENDATIONS:
   - Urgency level (Immediate/Urgent/Routine)
   - Specific next steps (biopsy type, excision, monitoring)
   - Cite sources for recommendations
4. PATIENT COMMUNICATION:
   - Clear explanation of findings
   - What to expect next
Remember: Cite [Source X] for clinical claims."""
    else:
        prompt = f"""You are an expert dermatologist. Analyze this skin lesion image.
Evaluate:
- Size, shape, symmetry
- Border characteristics
- Color zones and distribution
- ABCDE criteria
{'='*60}
MEDICAL LITERATURE:
{'='*60}
{retrieved_context}
Provide structured analysis:
1. DIFFERENTIAL DIAGNOSIS (3-5 options with likelihood)
2. CONCERNING FEATURES
3. CLINICAL RECOMMENDATIONS
4. PATIENT COMMUNICATION
Cite sources as [Source X]."""
    return prompt
print("‚úÖ Main analysis function ready!")

‚úÖ Main analysis function ready!


In [None]:
# ============================================================================
# CELL 23: GRADIO INTERFACE
# ============================================================================
# Custom CSS
custom_css = """
.gradio-container {
    max-width: 1400px !important;
    margin: auto;
}
.output-textbox textarea {
    max-height: 500px;
    overflow-y: auto !important;
    font-family: 'Courier New', monospace;
    font-size: 13px;
}
"""
# Build interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
    # üî¨ Complete Skin Cancer Analysis System
    ### OpenCV + Vision-Language Model + RAG Evidence-Based Diagnosis

    > ‚ö†Ô∏è **DISCLAIMER**: This tool is for **research and educational purposes only**.
    > It is NOT FDA-approved and should NOT be used for clinical diagnosis.
    > All findings must be confirmed by a qualified dermatologist.
    """)

    with gr.Row():
        # Left Column - Inputs
        with gr.Column(scale=1):
            gr.Markdown("### üì§ Upload Images")

            image_input = gr.Image(
                type="pil",
                label="Current Skin Lesion Image",
                height=300
            )

            with gr.Accordion("üìÖ Previous Image (for Evolution Analysis)", open=False):
                previous_image = gr.Image(
                    type="numpy",
                    label="Previous Image (optional)"
                )
                previous_date = gr.Textbox(
                    label="Previous Image Date (YYYY-MM-DD)",
                    placeholder="e.g., 2024-06-15"
                )

            gr.Markdown("### üîß Analysis Options")

            use_opencv = gr.Checkbox(
                value=True,
                label="‚úÖ Use OpenCV Feature Extraction (Recommended)"
            )

            with gr.Accordion("üìä Manual Pre-computed Data", open=False):
                manual_data = gr.Textbox(
                    label="Paste measurements here (optional)",
                    lines=5,
                    placeholder="Use this if you have pre-computed measurements..."
                )

            with gr.Accordion("‚öôÔ∏è Advanced Settings", open=False):
                max_tokens = gr.Slider(
                    512, 2048, value=1024, step=128,
                    label="Max Response Tokens"
                )
                temperature = gr.Slider(
                    0.1, 1.0, value=0.5, step=0.1,
                    label="Temperature (lower = more focused)"
                )
                num_sources = gr.Slider(
                    1, 10, value=5, step=1,
                    label="Number of Literature Sources"
                )

            analyze_btn = gr.Button(
                "üî¨ Analyze Lesion",
                variant="primary",
                size="lg"
            )

        # Right Column - Outputs
        with gr.Column(scale=2):
            gr.Markdown("### üìä Analysis Results")

            with gr.Tabs():
                with gr.Tab("üìê OpenCV Features"):
                    opencv_output = gr.Textbox(
                        label="Quantitative Measurements",
                        lines=20,
                        max_lines=30,
                        show_copy_button=True
                    )

                with gr.Tab("üî¨ Dermoscopic Structures"):
                    dermoscopic_output = gr.Textbox(
                        label="Structure Analysis",
                        lines=18,
                        max_lines=25,
                        show_copy_button=True
                    )

                with gr.Tab("üìö Literature Sources"):
                    sources_output = gr.Textbox(
                        label="Retrieved Medical Literature",
                        lines=15,
                        max_lines=25,
                        show_copy_button=True
                    )

                with gr.Tab("ü©∫ Evidence-Based Diagnosis"):
                    diagnosis_output = gr.Textbox(
                        label="VLM Diagnosis with Citations",
                        lines=18,
                        max_lines=30,
                        show_copy_button=True
                    )

                with gr.Tab("üìä Confidence Report"):
                    confidence_output = gr.Textbox(
                        label="Analysis Confidence",
                        lines=12,
                        max_lines=20,
                        show_copy_button=True
                    )

    # Instructions
    gr.Markdown("""
    ---
    ### üìã How to Use
    1. **Upload** a dermoscopic or clinical image of the skin lesion
    2. **(Optional)** Upload a previous image for evolution tracking
    3. **Enable** OpenCV feature extraction for quantitative analysis
    4. **Click** "Analyze Lesion" and wait 30-60 seconds
    5. **Review** results across all tabs

    ### ‚ú® Features
    - üìê **Calibrated measurements** (size, shape, asymmetry)
    - üé® **Color analysis** with clinical terminology
    - üî¨ **Dermoscopic structure detection** (pigment network, blue-white veil, etc.)
    - ‚ö†Ô∏è **ABCDE risk assessment**
    - üìà **Temporal evolution** tracking
    - üìö **Evidence-based diagnosis** with literature citations
    - üìä **Confidence scoring**
    """)

    # Connect button
    analyze_btn.click(
        fn=analyze_lesion_complete,
        inputs=[
            image_input, previous_image, previous_date,
            use_opencv, manual_data,
            max_tokens, temperature, num_sources
        ],
        outputs=[
            opencv_output, dermoscopic_output, sources_output,
            diagnosis_output, confidence_output
        ]
    )
# Launch
print("üöÄ Launching Gradio interface...")
demo.launch(
    share=True,
    debug=True,
    show_error=True
)

üöÄ Launching Gradio interface...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://fc9473ead831c2e4c1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
