<a href="https://colab.research.google.com/github/Nichal9651/Capstone-project/blob/main/FineTuneing_the_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ===============================
# BLIP: IMAGE CAPTIONING (BATCH PROCESSING)
# ===============================
import os
import pandas as pd
from pathlib import Path
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from tqdm.notebook import tqdm

# ---------- CONFIG ----------
import kagglehub
kaggle_root = kagglehub.dataset_download('nodoubttome/skin-cancer9-classesisic')
DATA_ROOT = os.path.join(
    kaggle_root,
    "Skin cancer ISIC The International Skin Imaging Collaboration",
    "Train"
)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 8  # Adjust based on GPU memory

# ---------- LOAD IMAGE PATHS + LABELS ----------
image_paths, labels = [], []
root_path = Path(DATA_ROOT)

for label_dir in sorted(root_path.iterdir()):
    if label_dir.is_dir():
        for img_file in label_dir.iterdir():
            if img_file.suffix.lower() in ['.png', '.jpg', '.jpeg']:
                image_paths.append(str(img_file))
                labels.append(label_dir.name)

df = pd.DataFrame({"image_path": image_paths, "label": labels})
print(f"✅ Loaded {len(df)} images across {df['label'].nunique()} classes.")

# ---------- LOAD BLIP MODEL ----------
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)

# ---------- GENERATE CAPTIONS IN BATCHES ----------
captions = []
for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Generating captions in batches"):
    batch_paths = df["image_path"].iloc[i:i+BATCH_SIZE].tolist()
    images = [Image.open(p).convert("RGB") for p in batch_paths]
    inputs = processor(images=images, return_tensors="pt").to(DEVICE)
    out = model.generate(**inputs)
    batch_captions = processor.batch_decode(out, skip_special_tokens=True)
    captions.extend(batch_captions)

df["caption"] = captions

# ---------- SHOW SAMPLE CAPTIONS ----------
print("\nSample Captions:")
print(df[["image_path", "label", "caption"]].head())

# ---------- SAVE CAPTIONS ----------
df.to_csv("captions_dataset.csv", index=False)
print("✅ Captions saved to captions_dataset.csv")


Using Colab cache for faster access to the 'skin-cancer9-classesisic' dataset.


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


✅ Loaded 2239 images across 9 classes.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generating captions in batches:   0%|          | 0/280 [00:00<?, ?it/s]


Sample Captions:
                                          image_path              label  \
0  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
1  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
2  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
3  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   
4  /kaggle/input/skin-cancer9-classesisic/Skin ca...  actinic keratosis   

                                   caption  
0            a close up of a piece of skin  
1           a close up of a pink substance  
2            a close up of a piece of skin  
3  a close up of a skin with a small patch  
4           a close up of a pink substance  
✅ Captions saved to captions_dataset.csv


In [8]:
# ===============================
# STEP 1: CONVERT BLIP CAPTIONS TO QUESTION FORMAT
# ===============================

# Create a new column 'new_label' that stores the question version of the caption
df["LLm-prompt"] = df["caption"].apply(
    lambda c: f"Based on the description '{c}', what type of skin disease is visible in this image?"
)

# Show sample converted prompts
print("\nSample Converted Captions to Questions:")
print(df[["caption", "LLm-prompt"]].head())

# Save updated dataframe
df.to_csv("captions_with_questions.csv", index=False)
print("✅ Captions converted to question format and saved to captions_with_questions.csv")



Sample Converted Captions to Questions:
                                   caption  \
0            a close up of a piece of skin   
1           a close up of a pink substance   
2            a close up of a piece of skin   
3  a close up of a skin with a small patch   
4           a close up of a pink substance   

                                          LLm-prompt  
0  Based on the description 'a close up of a piec...  
1  Based on the description 'a close up of a pink...  
2  Based on the description 'a close up of a piec...  
3  Based on the description 'a close up of a skin...  
4  Based on the description 'a close up of a pink...  
✅ Captions converted to question format and saved to captions_with_questions.csv


In [9]:
df


Unnamed: 0,image_path,label,caption,LLm-prompt
0,/kaggle/input/skin-cancer9-classesisic/Skin ca...,actinic keratosis,a close up of a piece of skin,Based on the description 'a close up of a piec...
1,/kaggle/input/skin-cancer9-classesisic/Skin ca...,actinic keratosis,a close up of a pink substance,Based on the description 'a close up of a pink...
2,/kaggle/input/skin-cancer9-classesisic/Skin ca...,actinic keratosis,a close up of a piece of skin,Based on the description 'a close up of a piec...
3,/kaggle/input/skin-cancer9-classesisic/Skin ca...,actinic keratosis,a close up of a skin with a small patch,Based on the description 'a close up of a skin...
4,/kaggle/input/skin-cancer9-classesisic/Skin ca...,actinic keratosis,a close up of a pink substance,Based on the description 'a close up of a pink...
...,...,...,...,...
2234,/kaggle/input/skin-cancer9-classesisic/Skin ca...,vascular lesion,a red spot on the skin of a woman,Based on the description 'a red spot on the sk...
2235,/kaggle/input/skin-cancer9-classesisic/Skin ca...,vascular lesion,a close up of a person ' s skin with a small r...,Based on the description 'a close up of a pers...
2236,/kaggle/input/skin-cancer9-classesisic/Skin ca...,vascular lesion,a close up of a red spot on a skin,Based on the description 'a close up of a red ...
2237,/kaggle/input/skin-cancer9-classesisic/Skin ca...,vascular lesion,a close up of a piece of skin,Based on the description 'a close up of a piec...


In [10]:
# ===============================
# PREPARE TRAINING DATA FOR LLM
# ===============================

# Select columns: llm_prompt (question) and label (answer)
train_df = df[["LLm-prompt", "label"]].copy()

# Rename them to input_text and target_text for training
train_df.rename(columns={"LLm-prompt": "input_text", "label": "target_text"}, inplace=True)

# Save to CSV
train_df.to_csv("llm_training_dataset.csv", index=False)
print("✅ Training dataset prepared with question–answer pairs (LLm-prompt → label).")


✅ Training dataset prepared with question–answer pairs (LLm-prompt → label).


In [11]:
train_df

Unnamed: 0,input_text,target_text
0,Based on the description 'a close up of a piec...,actinic keratosis
1,Based on the description 'a close up of a pink...,actinic keratosis
2,Based on the description 'a close up of a piec...,actinic keratosis
3,Based on the description 'a close up of a skin...,actinic keratosis
4,Based on the description 'a close up of a pink...,actinic keratosis
...,...,...
2234,Based on the description 'a red spot on the sk...,vascular lesion
2235,Based on the description 'a close up of a pers...,vascular lesion
2236,Based on the description 'a close up of a red ...,vascular lesion
2237,Based on the description 'a close up of a piec...,vascular lesion


In [13]:
!pip install --upgrade openai pandas tqdm




In [20]:
# ===============================
# FREE LLM CLASSIFIER (Flan-T5)
# ===============================
from transformers import pipeline
import json
from tqdm import tqdm
import torch

# Possible labels
POSSIBLE_LABELS = [
    "actinic keratosis", "basal cell carcinoma", "dermatofibroma", "melanoma",
    "nevus", "seborrheic keratosis", "squamous cell carcinoma",
    "vascular lesion", "normal"
]

# Prompt template
def make_prompt(question):
    return f"""
You are a dermatology assistant.
Question: {question}
Possible labels: {POSSIBLE_LABELS}

Respond strictly in JSON:
{{"label": "...", "reason": "..."}}
"""

# Load model (free + ungated)
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    device=0 if torch.cuda.is_available() else -1
)

# Run prediction
preds, reasons = [], []
for q in tqdm(df["LLm-prompt"].tolist(), desc="Running Flan-T5 classification"):
    response = llm(make_prompt(q), max_new_tokens=100)[0]["generated_text"]
    try:
        r = json.loads(response)
        preds.append(r.get("label", "unknown").lower().strip())
        reasons.append(r.get("reason", ""))
    except:
        preds.append(response.strip().lower())
        reasons.append("raw output")

df["llm_prediction"] = preds
df["llm_reason"] = reasons
df.to_csv("flan_t5_predictions.csv", index=False)
print("✅ Predictions saved to flan_t5_predictions.csv")


Device set to use cuda:0
Running Flan-T5 classification:   0%|          | 10/2239 [00:05<17:48,  2.09it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running Flan-T5 classification: 100%|██████████| 2239/2239 [16:55<00:00,  2.20it/s]

✅ Predictions saved to flan_t5_predictions.csv





In [21]:
from sklearn.metrics import accuracy_score, classification_report

df["true_label_clean"] = df["label"].str.lower().str.strip()
df["pred_clean"] = df["llm_prediction"].str.lower().str.strip()

acc = accuracy_score(df["true_label_clean"], df["pred_clean"])
print(f"\n✅ Flan-T5 Accuracy: {acc:.4f}")
print("\n", classification_report(df["true_label_clean"], df["pred_clean"]))



✅ Flan-T5 Accuracy: 0.0782

                             precision    recall  f1-score   support

         actinic keratosis       0.16      0.07      0.10       114
      basal cell carcinoma       0.00      0.00      0.00       376
            dermatofibroma       0.00      0.00      0.00        95
                  melanoma       0.10      0.03      0.04       438
                     nevus       1.00      0.00      0.01       357
                    normal       0.00      0.00      0.00         0
pigmented benign keratosis       0.00      0.00      0.00       462
      seborrheic keratosis       0.02      0.03      0.02        77
   squamous cell carcinoma       0.08      0.78      0.14       181
           vascular lesion       0.10      0.08      0.09       139

                  accuracy                           0.08      2239
                 macro avg       0.15      0.10      0.04      2239
              weighted avg       0.20      0.08      0.03      2239



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
!pip install transformers datasets accelerate sentencepiece -q


In [37]:
!pip install -U transformers


Collecting transformers
  Downloading transformers-4.57.0-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.57.0-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.56.2
    Uninstalling transformers-4.56.2:
      Successfully uninstalled transformers-4.56.2
Successfully installed transformers-4.57.0


In [30]:
print(train_df.columns.tolist())
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_df)
print(train_dataset)



['input_text', 'target_text']
Dataset({
    features: ['input_text', 'target_text'],
    num_rows: 2239
})


In [42]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset
import evaluate

# Load base model and tokenizer
model_name = "google/flan-t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize the dataset
def preprocess(example):
    inputs = tokenizer(example["input_text"], truncation=True, padding="max_length", max_length=256)
    labels = tokenizer(example["target_text"], truncation=True, padding="max_length", max_length=64)
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized_dataset = train_dataset.map(preprocess, batched=True)

# Metrics
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Training setup
training_args = TrainingArguments(
    output_dir="./flan_t5_finetuned",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    num_train_epochs=13,
    logging_dir="./logs",
    save_total_limit=1
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

trainer.train()


Map:   0%|          | 0/2239 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
500,1.6033
1000,0.0368
1500,0.0346
2000,0.0332
2500,0.0325
3000,0.0311
3500,0.0309
4000,0.0305
4500,0.0298
5000,0.0293


TrainOutput(global_step=7280, training_loss=0.13898044123754397, metrics={'train_runtime': 4472.0002, 'train_samples_per_second': 6.509, 'train_steps_per_second': 1.628, 'total_flos': 9965616128851968.0, 'train_loss': 0.13898044123754397, 'epoch': 13.0})

In [39]:
trainer.save_model("./flan_t5_finetuned")


In [40]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_path = "./flan_t5_finetuned"
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)


In [41]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score

preds, refs = [], []

for i, row in tqdm(train_df.iterrows(), total=len(train_df)):
    input_text = row["input_text"]
    true_label = row["target_text"]

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=64)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()

    preds.append(pred)
    refs.append(true_label.lower())

accuracy = accuracy_score(refs, preds)
print(f"✅ Fine-tuned Model Accuracy: {accuracy:.4f}")


100%|██████████| 2239/2239 [19:44<00:00,  1.89it/s]

✅ Fine-tuned Model Accuracy: 0.2519





In [43]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score

preds, refs = [], []

for i, row in tqdm(train_df.iterrows(), total=len(train_df)):
    input_text = row["input_text"]
    true_label = row["target_text"]

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=64)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()

    preds.append(pred)
    refs.append(true_label.lower())

accuracy = accuracy_score(refs, preds)
print(f"✅ Fine-tuned Model Accuracy: {accuracy:.4f}")

100%|██████████| 2239/2239 [07:11<00:00,  5.19it/s]


✅ Fine-tuned Model Accuracy: 0.3001
