In [None]:
# ===============================
# BLIP: IMAGE CAPTIONING (BATCH PROCESSING)
# ===============================
import os
import pandas as pd
from pathlib import Path
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from tqdm.notebook import tqdm

# ---------- CONFIG ----------
import kagglehub
kaggle_root = kagglehub.dataset_download('nodoubttome/skin-cancer9-classesisic')
DATA_ROOT = os.path.join(
    kaggle_root,
    "Skin cancer ISIC The International Skin Imaging Collaboration",
    "Train"
)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 8  # Adjust based on GPU memory

# ---------- LOAD IMAGE PATHS + LABELS ----------
image_paths, labels = [], []
root_path = Path(DATA_ROOT)

for label_dir in sorted(root_path.iterdir()):
    if label_dir.is_dir():
        for img_file in label_dir.iterdir():
            if img_file.suffix.lower() in ['.png', '.jpg', '.jpeg']:
                image_paths.append(str(img_file))
                labels.append(label_dir.name)

df = pd.DataFrame({"image_path": image_paths, "label": labels})
print(f"✅ Loaded {len(df)} images across {df['label'].nunique()} classes.")

# ---------- LOAD BLIP MODEL ----------
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)

# ---------- GENERATE CAPTIONS IN BATCHES ----------
captions = []
for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Generating captions in batches"):
    batch_paths = df["image_path"].iloc[i:i+BATCH_SIZE].tolist()
    images = [Image.open(p).convert("RGB") for p in batch_paths]
    inputs = processor(images=images, return_tensors="pt").to(DEVICE)
    out = model.generate(**inputs)
    batch_captions = processor.batch_decode(out, skip_special_tokens=True)
    captions.extend(batch_captions)

df["caption"] = captions

# ---------- SHOW SAMPLE CAPTIONS ----------
print("\nSample Captions:")
print(df[["image_path", "label", "caption"]].head())

# ---------- SAVE CAPTIONS ----------
df.to_csv("captions_dataset.csv", index=False)
print("✅ Captions saved to captions_dataset.csv")


Using Colab cache for faster access to the 'skin-cancer9-classesisic' dataset.


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


✅ Loaded 2239 images across 9 classes.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Generating captions in batches:   0%|          | 0/280 [00:00<?, ?it/s]


Sample Captions:
                                          image_path              label  \
0  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
1  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
2  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
3  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
4  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   

                                   caption  
0            a close up of a piece of skin  
1           a close up of a pink substance  
2            a close up of a piece of skin  
3  a close up of a skin with a small patch  
4           a close up of a pink substance  
✅ Captions saved to captions_dataset.csv


In [None]:
# ===============================
# FREE LLM CLASSIFIER (Flan-T5)
# ===============================
from transformers import pipeline
import json
from tqdm import tqdm

# Define possible skin cancer classes
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma", "vascular lesion", "normal"
]

# Prompt template
def make_prompt(caption):
    return f"""
You are a medical assistant specializing in dermatology.

Caption: "{caption}"

Possible labels: {POSSIBLE_LABELS}

Task:
1. Pick the most likely label from the list.
2. Provide reasoning in one short sentence.

Respond strictly in JSON:
{{"label": "...", "rationale": "..."}} m
"""

# Load Flan-T5 model (free + ungated)
classifier = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    device=0 if torch.cuda.is_available() else -1
)

# Run classification for each BLIP caption
predictions, rationales = [], []
for caption in tqdm(df["caption"].tolist(), desc="Classifying with Flan-T5"):
    response = classifier(make_prompt(caption), max_new_tokens=100)[0]["generated_text"]
    try:
        result = json.loads(response)
        predictions.append(result.get("label", "unknown").lower().strip())
        rationales.append(result.get("rationale", ""))
    except:
        predictions.append(response.strip().lower())
        rationales.append("raw output")

df["llm_prediction"] = predictions
df["llm_rationale"] = rationales

# Save intermediate results
df.to_csv("captions_with_predictions.csv", index=False)
print(" Predictions saved to captions_with_predictions.csv")

# Show sample results
print(df[["caption", "llm_prediction", "llm_rationale"]].head())


Device set to use cuda:0
Classifying with Flan-T5: 100%|██████████| 2239/2239 [1:21:43<00:00,  2.19s/it]

 Predictions saved to captions_with_predictions.csv
                                   caption  \
0            a close up of a piece of skin   
1           a close up of a pink substance   
2            a close up of a piece of skin   
3  a close up of a skin with a small patch   
4           a close up of a pink substance   

                                      llm_prediction llm_rationale  
0  seborrheic keratosis is a type of skin conditi...    raw output  
1  seborrheic keratosis is a type of pink substan...    raw output  
2  seborrheic keratosis is a type of skin conditi...    raw output  
3  seborrheic keratosis is a skin condition in wh...    raw output  
4  seborrheic keratosis is a type of pink substan...    raw output  





In [None]:
sample_image_path = df["image_path"].iloc[0]  # just take the first image
print(sample_image_path)


/kaggle/input/skin-cancer9-classesisic/Skin cancer ISIC The International Skin Imaging Collaboration/Train/actinic keratosis/ISIC_0027580.jpg


In [None]:
from PIL import Image

# Load image
image = Image.open(sample_image_path).convert("RGB")

# BLIP captioning
inputs = processor(images=image, return_tensors="pt").to(DEVICE)
out = model.generate(**inputs, max_new_tokens=64)
caption = processor.decode(out[0], skip_special_tokens=True)

print("Generated Caption:", caption)


Generated Caption: a close up of a piece of skin


In [None]:
import re

# Canonical labels
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma", "vascular lesion", "normal"
]

# Keywords mapping for fuzzy matching
KEYWORDS = {
    "actinic": "actinic keratosis",
    "basal": "basal cell carcinoma",
    "bcc": "basal cell carcinoma",
    "dermato": "dermatofibroma",
    "melan": "melanoma",
    "mole": "nevus",
    "nevus": "nevus",
    "seborrheic": "seborrheic keratosis",
    "squamous": "squamous cell carcinoma",
    "scc": "squamous cell carcinoma",
    "vascular": "vascular lesion",
    "normal": "normal"
}

def clean_label(text):
    if not isinstance(text, str):
        return "unknown"
    text = text.lower().strip()
    text = re.sub(r"[^a-z\s]", "", text)  # remove punctuation
    for k, v in KEYWORDS.items():
        if k in text:
            return v
    return "unknown"

df["true_label_clean"] = df["label"].apply(clean_label)
df["llm_prediction_clean"] = df["llm_prediction"].apply(clean_label)

print("Sample of cleaned labels:")
print(df[["label", "llm_prediction", "llm_prediction_clean"]].head(10))


Sample of cleaned labels:
               label                                     llm_prediction  \
0  actinic keratosis  seborrheic keratosis is a type of skin conditi...   
1  actinic keratosis  seborrheic keratosis is a type of pink substan...   
2  actinic keratosis  seborrheic keratosis is a type of skin conditi...   
3  actinic keratosis  seborrheic keratosis is a skin condition in wh...   
4  actinic keratosis  seborrheic keratosis is a type of pink substan...   
5  actinic keratosis  seborrheic keratosis is a white and red substa...   
6  actinic keratosis  seborrheic keratosis is a pink substance on a ...   
7  actinic keratosis  seborrheic keratosis is a type of skin conditi...   
8  actinic keratosis  seborrheic keratosis is a type of pink substan...   
9  actinic keratosis  seborrheic keratosis is a pinkish area with a ...   

   llm_prediction_clean  
0  seborrheic keratosis  
1  seborrheic keratosis  
2  seborrheic keratosis  
3  seborrheic keratosis  
4  seborrheic kera

In [None]:
def make_prompt(caption):
    examples = """
Example 1:
Caption: "A small brown lesion with uneven border"
Label: melanoma

Example 2:
Caption: "A raised pink growth with a pearly edge"
Label: basal cell carcinoma

Example 3:
Caption: "A flat scaly patch on sun-exposed skin"
Label: actinic keratosis
"""
    return f"""
You are a dermatology assistant.

{examples}

Now classify this new caption:
"{caption}"

Possible labels: {POSSIBLE_LABELS}

Respond only in JSON: {{"label": "...", "rationale": "..."}}
"""


In [None]:
# ===============================
# ACCURACY EVALUATION
# ===============================
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import re

# Canonical labels
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma", "vascular lesion", "normal"
]

# Keywords mapping for fuzzy matching
KEYWORDS = {
    "actinic": "actinic keratosis",
    "basal": "basal cell carcinoma",
    "bcc": "basal cell carcinoma",
    "dermato": "dermatofibroma",
    "melan": "melanoma",
    "mole": "nevus",
    "nevus": "nevus",
    "seborrheic": "seborrheic keratosis",
    "squamous": "squamous cell carcinoma",
    "scc": "squamous cell carcinoma",
    "vascular": "vascular lesion",
    "normal": "normal"
}

def clean_label(text):
    if not isinstance(text, str):
        return "unknown"
    text = text.lower().strip()
    text = re.sub(r"[^a-z\s]", "", text)  # remove punctuation
    for k, v in KEYWORDS.items():
        if k in text:
            return v
    return "unknown"

# Clean predictions to nearest valid label
def clean_prediction(pred, valid_labels):
    for lbl in valid_labels:
        if lbl in pred:
            return lbl
    return "unknown"

# Load the dataframe from the saved CSV
try:
    df = pd.read_csv("captions_with_predictions.csv")
except FileNotFoundError:
    print("Error: captions_with_predictions.csv not found. Please run the previous cells to generate it.")
    # Exit if the file is not found to prevent further errors
    exit()

# Add clean labels if they don't exist
if "true_label_clean" not in df.columns:
    df["true_label_clean"] = df["label"].apply(clean_label)
if "llm_prediction_clean" not in df.columns:
    df["llm_prediction_clean"] = df["llm_prediction"].apply(clean_label)

# Further clean llm_prediction_clean using the clean_prediction function
valid_labels = [lbl.lower() for lbl in POSSIBLE_LABELS]
df["llm_prediction_clean"] = df["llm_prediction_clean"].apply(lambda x: clean_prediction(x, valid_labels))


accuracy = accuracy_score(df["true_label_clean"], df["llm_prediction_clean"])
print(f"\n✅ Model Accuracy (BLIP → Flan-T5): {accuracy:.4f}\n")

# Print classification report
print(classification_report(df["true_label_clean"], df["llm_prediction_clean"]))

Error: captions_with_predictions.csv not found. Please run the previous cells to generate it.


NameError: name 'df' is not defined

In [None]:
# Clean predictions to nearest valid label
def clean_label(pred, valid_labels):
    for lbl in valid_labels:
        if lbl in pred:
            return lbl
    return "unknown"

valid_labels = [lbl.lower() for lbl in POSSIBLE_LABELS]
df["llm_prediction_clean"] = df["llm_prediction_clean"].apply(lambda x: clean_label(x, valid_labels))
