<a href="https://colab.research.google.com/github/Nichal9651/Capstone-project/blob/main/BLIP_and_LL_model_for_Skin_Cancer_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# ===============================
# BLIP: IMAGE CAPTIONING (BATCH PROCESSING)
# ===============================
import os
import pandas as pd
from pathlib import Path
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from tqdm.notebook import tqdm

# ---------- CONFIG ----------
import kagglehub
kaggle_root = kagglehub.dataset_download('nodoubttome/skin-cancer9-classesisic')
DATA_ROOT = os.path.join(
    kaggle_root,
    "Skin cancer ISIC The International Skin Imaging Collaboration",
    "Train"
)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 8  # Adjust based on GPU memory

# ---------- LOAD IMAGE PATHS + LABELS ----------
image_paths, labels = [], []
root_path = Path(DATA_ROOT)

for label_dir in sorted(root_path.iterdir()):
    if label_dir.is_dir():
        for img_file in label_dir.iterdir():
            if img_file.suffix.lower() in ['.png', '.jpg', '.jpeg']:
                image_paths.append(str(img_file))
                labels.append(label_dir.name)

df = pd.DataFrame({"image_path": image_paths, "label": labels})
print(f"✅ Loaded {len(df)} images across {df['label'].nunique()} classes.")

# ---------- LOAD BLIP MODEL ----------
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)

# ---------- GENERATE CAPTIONS IN BATCHES ----------
captions = []
for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Generating captions in batches"):
    batch_paths = df["image_path"].iloc[i:i+BATCH_SIZE].tolist()
    images = [Image.open(p).convert("RGB") for p in batch_paths]
    inputs = processor(images=images, return_tensors="pt").to(DEVICE)
    out = model.generate(**inputs)
    batch_captions = processor.batch_decode(out, skip_special_tokens=True)
    captions.extend(batch_captions)

df["caption"] = captions

# ---------- SHOW SAMPLE CAPTIONS ----------
print("\nSample Captions:")
print(df[["image_path", "label", "caption"]].head())

# ---------- SAVE CAPTIONS ----------
df.to_csv("captions_dataset.csv", index=False)
print("✅ Captions saved to captions_dataset.csv")


Using Colab cache for faster access to the 'skin-cancer9-classesisic' dataset.
✅ Loaded 2239 images across 9 classes.


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating captions in batches:   0%|          | 0/280 [00:00<?, ?it/s]


Sample Captions:
                                          image_path              label  \
0  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
1  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
2  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
3  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
4  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   

                                   caption  
0            a close up of a piece of skin  
1           a close up of a pink substance  
2            a close up of a piece of skin  
3  a close up of a skin with a small patch  
4           a close up of a pink substance  
✅ Captions saved to captions_dataset.csv


In [4]:
# ===============================
# FREE LLM CLASSIFIER (Flan-T5)
# ===============================
from transformers import pipeline
import json
from tqdm import tqdm

# Possible classes
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma", "vascular lesion", "normal"
]

# Define prompt
def make_prompt(caption):
    return f"""
You are a medical assistant.

Caption: "{caption}"

Possible labels: {POSSIBLE_LABELS}

Task:
1. Pick the most likely label from the list.
2. Give reasoning in 1-2 short sentences.

Respond in JSON:
{{"label": "...", "rationale": "..."}}
"""

# Load a free Hugging Face model (no token needed)
classifier = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",   # free + ungated
    device=0 if torch.cuda.is_available() else -1
)

# Run classification
predictions, rationales = [], []
for caption in tqdm(df["caption"].tolist(), desc="Classifying with Flan-T5"):
    response = classifier(make_prompt(caption), max_new_tokens=100)[0]["generated_text"]
    try:
        result = json.loads(response)
        predictions.append(result.get("label", "unknown"))
        rationales.append(result.get("rationale", ""))
    except:
        predictions.append(response.strip())
        rationales.append("raw output")

df["llm_prediction"] = predictions
df["llm_rationale"] = rationales

# Save results
df.to_csv("captions_with_predictions.csv", index=False)
print("✅ Predictions saved to captions_with_predictions.csv")

# Show sample
print(df[["caption", "llm_prediction", "llm_rationale"]].head())


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Classifying with Flan-T5:   0%|          | 10/2239 [00:22<1:21:08,  2.18s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Classifying with Flan-T5: 100%|██████████| 2239/2239 [1:21:55<00:00,  2.20s/it]

✅ Predictions saved to captions_with_predictions.csv
                                   caption  \
0            a close up of a piece of skin   
1           a close up of a pink substance   
2            a close up of a piece of skin   
3  a close up of a skin with a small patch   
4           a close up of a pink substance   

                                      llm_prediction llm_rationale  
0  Seborrheic keratosis is a type of skin conditi...    raw output  
1  Seborrheic keratosis is a type of skin conditi...    raw output  
2  Seborrheic keratosis is a type of skin conditi...    raw output  
3  Seborrheic keratosis is a skin condition in wh...    raw output  
4  Seborrheic keratosis is a type of skin conditi...    raw output  





In [7]:
sample_image_path = df["image_path"].iloc[0]  # just take the first image
print(sample_image_path)


/kaggle/input/skin-cancer9-classesisic/Skin cancer ISIC The International Skin Imaging Collaboration/Train/actinic keratosis/ISIC_0027580.jpg


In [8]:
from PIL import Image

# Load image
image = Image.open(sample_image_path).convert("RGB")

# BLIP captioning
inputs = processor(images=image, return_tensors="pt").to(DEVICE)
out = model.generate(**inputs, max_new_tokens=64)
caption = processor.decode(out[0], skip_special_tokens=True)

print("Generated Caption:", caption)


Generated Caption: a close up of a piece of skin


In [9]:
from transformers import pipeline
import torch, json

# Load free LLM
classifier = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    device=0 if torch.cuda.is_available() else -1
)

# Possible labels
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma", "vascular lesion", "normal"
]

def make_prompt(caption):
    return f"""
You are a medical assistant.

Caption: "{caption}"

Possible labels: {POSSIBLE_LABELS}

Task:
1. Pick the most likely label from the list.
2. Give reasoning in 1–2 short sentences.

Respond in JSON:
{{"label": "...", "rationale": "..."}}
"""

# Run prediction
response = classifier(make_prompt(caption), max_new_tokens=100)[0]["generated_text"]

try:
    result = json.loads(response)
except:
    result = {"label": response.strip(), "rationale": "raw output"}

print("Predicted Label:", result["label"])
print("Rationale:", result["rationale"])


Device set to use cuda:0


Predicted Label: Seborrheic keratosis is a type of skin condition. Seborrheic keratosis is a type of skin condition. The answer: seborrheic keratosis.
Rationale: raw output
