<a href="https://colab.research.google.com/github/IsurikaDilrukshi/Research_LLM_Finetune/blob/main/LLAva_Finetune_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
dataset_dir = "/content/drive/MyDrive/Research/Solar image data/images"


 Step 3: Define Fault Descriptions

In [None]:
!pip install -U "transformers>=4.39.0"
!pip install peft bitsandbytes
!pip install -U "trl>=0.8.3"

Collecting transformers>=4.39.0
  Downloading transformers-4.53.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.53.2-py3-none-any.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m78.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.53.1
    Uninstalling transformers-4.53.1:
      Successfully uninstalled transformers-4.53.1
Successfully installed transformers-4.53.2
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from

In [None]:
FAULT_DESCRIPTIONS = {
    "clean": "This is a clean solar panel in good condition. There are no visible signs of dirt, damage, or obstruction. It should function at optimal efficiency.",
    "dusty": "This solar panel is covered in dust. Dust accumulation blocks sunlight from reaching the cells, which significantly reduces energy output. Regular cleaning is required to maintain performance.",
    "physical damage": "The solar panel shows physical damage, such as cracks or broken glass. This can lead to decreased performance and may even pose safety risks due to short-circuiting or exposure to weather.",
    "bird-drops": "Bird droppings are visible on this panel. These block sunlight and can cause hotspots that damage the cells over time. The panel should be cleaned as soon as possible."
}


Step 4: Create Prompt-Answer Pairs for All Images

In [None]:
import os
from PIL import Image

def prepare_llava_dataset(dataset_dir):
    data = []

    for category in os.listdir(dataset_dir):
        category_dir = os.path.join(dataset_dir, category)

        if not os.path.isdir(category_dir):
            continue  # skip if it's not a folder

        # Get the fault description from the dictionary
        if category.lower() not in FAULT_DESCRIPTIONS:
             print(f"⚠️ Skipping unknown category: {category}")
             continue

        description = FAULT_DESCRIPTIONS[category.lower()]


        for img_name in os.listdir(category_dir):
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue  # skip non-image files

            img_path = os.path.join(category_dir, img_name)

            try:
                image = Image.open(img_path).convert("RGB")
            except Exception as e:
                print(f"❌ Skipping {img_path} due to error: {e}")
                continue

            # Construct the LLaVA-style prompt-answer format
            example = {
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {"type": "image"},
                            {"type": "text", "text": "Describe the fault in this solar panel image in detail. Explain what caused it and how it affects performance"}
                        ]
                    },
                    {
                        "role": "assistant",
                        "content": [
                            {"type": "text", "text": description}
                        ]
                    }
                ],
                "images": [image]  # the loaded PIL image
            }

            data.append(example)

    print(f" Prepared {len(data)} examples for fine-tuning")
    return data

# Generate dataset
llava_training_data = prepare_llava_dataset(dataset_dir)

 Prepared 218 examples for fine-tuning


You can preview the prompt-answer structure for the first 2-3 samples:

In [None]:
for i, sample in enumerate(llava_training_data[:3]):
    print(f"Sample {i+1}")
    print("User Prompt:", sample["messages"][0]["content"][1]["text"])
    print("Assistant Answer:", sample["messages"][1]["content"][0]["text"])
    print()


Sample 1
User Prompt: Describe the fault in this solar panel image in detail. Explain what caused it and how it affects performance
Assistant Answer: Bird droppings are visible on this panel. These block sunlight and can cause hotspots that damage the cells over time. The panel should be cleaned as soon as possible.

Sample 2
User Prompt: Describe the fault in this solar panel image in detail. Explain what caused it and how it affects performance
Assistant Answer: Bird droppings are visible on this panel. These block sunlight and can cause hotspots that damage the cells over time. The panel should be cleaned as soon as possible.

Sample 3
User Prompt: Describe the fault in this solar panel image in detail. Explain what caused it and how it affects performance
Assistant Answer: Bird droppings are visible on this panel. These block sunlight and can cause hotspots that damage the cells over time. The panel should be cleaned as soon as possible.



In [None]:
import random
random.shuffle(llava_training_data)

train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

n_total = len(llava_training_data)
n_train = int(train_ratio * n_total)
n_val = int(val_ratio * n_total)

train_data = llava_training_data[:n_train]
val_data = llava_training_data[n_train:n_train + n_val]
test_data = llava_training_data[n_train + n_val:]


print(f" Train samples: {len(train_data)}")
print(f" Eval samples: {len(val_data)}")
print(f" Test samples: {len(test_data)}")

 Train samples: 152
 Eval samples: 32
 Test samples: 34


Define the PyTorch Dataset class

Wrap your split lists

In [None]:
from torch.utils.data import Dataset
from datasets import Dataset as hfDataset # Import the Hugging Face Dataset

class LlavaSolarDataset(Dataset):
    def __init__(self, data):
        self.data = data  # a list of dicts with "messages" and "images"

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

train_dataset = LlavaSolarDataset(train_data)
val_dataset = LlavaSolarDataset(val_data)
test_dataset = LlavaSolarDataset(test_data)

#Convert to Hugging Face Dataset format
train_dataset_hf = hfDataset.from_list(train_data)
val_dataset_hf = hfDataset.from_list(val_data)
test_dataset_hf = hfDataset.from_list(test_data)

Sanity Check a Few Samples

3.1 Install Required Packages

These libraries enable 4-bit quantization, LoRA, and LLaVA support.

 3.2 Import Required Libraries

In [None]:
import torch
from transformers import (
AutoTokenizer,
AutoProcessor,
LlavaForConditionalGeneration,
BitsAndBytesConfig
)

3.3 Set Model ID

In [None]:
model_id = "llava-hf/llava-1.5-7b-hf" # Or: "llava-hf/llava-v1.6-mistral-7b-hf"

 3.4 Define BitsAndBytesConfig for 4-bit Quantization

In [None]:
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)

 3.5 Load the Model

In [None]:
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16,
quantization_config=quant_config,
device_map="auto" # Automatically assigns layers to available GPUs/CPUs
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/950 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

3.6 Load Tokenizer and Processor

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/674 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


 3.7 (Optional but Important) Define Chat Template for LLaVA

In [None]:
LLAVA_CHAT_TEMPLATE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}"""

tokenizer.chat_template = LLAVA_CHAT_TEMPLATE
processor.tokenizer = tokenizer

Step 4: Add Trainable LoRA Adapter (Required for 4-bit Finetuning)

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
)

model = prepare_model_for_kbit_training(model)  # ✅ Important for 4-bit models
model = get_peft_model(model, lora_config)

model.print_trainable_parameters()


trainable params: 76,546,048 || all params: 7,139,973,120 || trainable%: 1.0721


 4.2 Attach LoRA Adapter to the Base Model

In [None]:
model = get_peft_model(model, lora_config)



In [None]:
def print_trainable_parameters(model):
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    print(f"✅ Trainable parameters: {trainable} / {total} ({100 * trainable / total:.2f}%)")

print_trainable_parameters(model)

✅ Trainable parameters: 76546048 / 3740489728 (2.05%)


In [None]:
class LlavaDataCollator:
    def __init__(self, processor):
        self.processor = processor  # includes tokenizer and image processor

    def __call__(self, examples):
        texts = []
        images = []

        for example in examples:
            messages = example["messages"]

            # Convert messages (chat) to a single text prompt using the chat template
            text = self.processor.tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=False  # We're providing ground truth
            )

            texts.append(text)
            images.append(example["images"][0])  # First image per sample

        # Tokenize prompts and preprocess images - Corrected order
        batch = self.processor(
            images,  # Corrected: images first
            texts,   # Corrected: texts second
            return_tensors="pt",
            padding=True
        )

        # Create labels from input_ids
        labels = batch["input_ids"].clone()

        # Set pad_token_id as -100 in labels to ignore during loss
        if self.processor.tokenizer.pad_token_id is not None:
            labels[labels == self.processor.tokenizer.pad_token_id] = -100

        batch["labels"] = labels

        return batch

In [None]:
from torch.utils.data import DataLoader

data_collator = LlavaDataCollator(processor)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=2, collate_fn=data_collator)

Step 5: Set Up Training Configuration (TrainingArguments)

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
output_dir="./llava-solar-ft", # Folder to save model checkpoints
per_device_train_batch_size=1, # Adjust based on your GPU memory
per_device_eval_batch_size=1,
gradient_accumulation_steps=1, # Effective batch size = 2 × 2 = 4
learning_rate=2e-4, # Start with 2e-4 or 1e-4
num_train_epochs=10, # You can increase this for better results
logging_steps=10, # Print loss every 10 steps
save_steps=100, # Save checkpoint every 100 steps (optional)
eval_strategy="steps", # Evaluate during training (optional)
eval_steps=50, # How often to evaluate (optional)
save_total_limit=2, # Keep only last 2 checkpoints (optional)
fp16=True, # Enable 16-bit training (saves memory)
remove_unused_columns=False, # Needed for custom collator
report_to="tensorboard", # Enable TensorBoard logging
logging_dir="./logs", # Log dir for TensorBoard
gradient_checkpointing=True # Reduce memory usage
)

In [None]:
from trl import SFTTrainer
from datasets import Dataset # Import Dataset from datasets library

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_hf, # Use the Hugging Face Dataset
    eval_dataset=val_dataset_hf,   # Use the Hugging Face Dataset
    data_collator=data_collator,
)

Tokenizing train dataset:   0%|          | 0/152 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/152 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/32 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/32 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
model.train()  # ✅ Ensure model is in training mode


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): LlavaForConditionalGeneration(
          (model): LlavaModel(
            (vision_tower): CLIPVisionModel(
              (vision_model): CLIPVisionTransformer(
                (embeddings): CLIPVisionEmbeddings(
                  (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False)
                  (position_embedding): Embedding(577, 1024)
                )
                (pre_layrnorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
                (encoder): CLIPEncoder(
                  (layers): ModuleList(
                    (0-23): 24 x CLIPEncoderLayer(
                      (self_attn): CLIPAttention(
                        (k_proj): lora.Linear4bit(
                          (base_layer): Linear4bit(in_features=1024, out_features=1024, bias=True)
                          (lora_dropout): ModuleDict(
 

In [None]:
import warnings
warnings.filterwarnings("ignore", message="None of the inputs have requires_grad=True*")


In [None]:
trainer.train()


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss
50,3.8187,3.735749
100,3.3698,3.362265
150,3.3162,3.318796
200,3.2966,3.298306
250,3.2963,3.294652
300,3.2835,3.292778
350,3.2798,3.292519
400,3.2875,3.291946
450,3.2869,3.292073
500,3.2928,3.2909


In [None]:
print(model)

NameError: name 'model' is not defined

In [None]:
for log in trainer.state.log_history:
    print(log)


In [None]:
print("✅ Running evaluation on test set...")
test_metrics = trainer.evaluate(test_dataset)     # ✅ RIGHT
print("📊 Test set metrics:", test_metrics)


In [None]:
metrics = trainer.evaluate()
print(metrics)


Save the model

In [None]:
trainer.save_model("./llava-solar-finetuned")
processor.save_pretrained("./llava-solar-finetuned")


To keep your model safe, move it to your Google Drive:

In [None]:
!cp -r ./llava-solar-finetuned "/content/drive/MyDrive/Research/Solar_image"


You can later load it with:

IF need load the model

##Test model

In [None]:
from PIL import Image
import torch

# Load image
image_path = "/content/drive/MyDrive/Research/Solar_image/images/dusty/Dust (2).jpg"
image = Image.open(image_path).convert("RGB")
display(image)

# Define prompt
prompt = "What do you see in this solar panel image?"
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": prompt}
        ]
    }
]

# Format input
chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=100)

# Decode response
decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
response = decoded.split("ASSISTANT:")[-1].strip()

# ✅ Print both user input and assistant response
print("🗨️ User Prompt:", prompt)
print("🤖 Model Response:", response)


In [None]:
from PIL import Image
import torch

# Load image
image_path = "/content/drive/MyDrive/Research/Solar_image/images/clean/Clean (21).jpg"
image = Image.open(image_path).convert("RGB")
display(image)

# Define prompt
prompt = "Is there any fault in this solar panel?"
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": prompt}
        ]
    }
]

# Format input
chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=100)

# Decode response
decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
response = decoded.split("ASSISTANT:")[-1].strip()

# ✅ Print both user input and assistant response
print("🗨️ User Prompt:", prompt)
print("🤖 Model Response:", response)


In [None]:
from PIL import Image
from IPython.display import display, Markdown
import torch

# Load image
image_path = "/content/drive/MyDrive/Research/Solar_image/images/physical damage/crack_083_jpg.rf.946fef3302b7c58ab3c0e3cbaabef6fb.jpg"
image = Image.open(image_path).convert("RGB")

# Display image
display(image)

# Define prompt
prompt = "What is the fault in this solar panel image?"
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": prompt}
        ]
    }
]

# Format input for the model
chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=100)

# Decode and print response
decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
response = decoded.split("ASSISTANT:")[-1].strip()

# Display prompt and response
display(Markdown(f"**🗨️ User Prompt:** {prompt}"))
display(Markdown(f"**🤖 Model Response:** {response}"))


In [None]:
from PIL import Image
from IPython.display import display, Markdown
import torch

# Load image
image_path = "/content/drive/MyDrive/Research/Solar_image/images/physical damage/71_jpg.rf.a6d05c0002d3a5789af082f67ebc5263.jpg"
image = Image.open(image_path).convert("RGB")

# Display image
display(image)

# Define prompt
prompt = "What is the fault in this solar panel image?"
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": prompt}
        ]
    }
]

# Format input for the model
chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=200)

# Decode and print response
decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
response = decoded.split("ASSISTANT:")[-1].strip()

# Display prompt and response
display(Markdown(f"**🗨️ User Prompt:** {prompt}"))
display(Markdown(f"**🤖 Model Response:** {response}"))


##Conversational chat

In [None]:
from PIL import Image
from IPython.display import display, Markdown
import torch

# Load the first image (used only once)
image_path = "/content/drive/MyDrive/Research/Solar_image/images/physical damage/71_jpg.rf.a6d05c0002d3a5789af082f67ebc5263.jpg"
image = Image.open(image_path).convert("RGB")
display(image)

# Define initial conversation
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},  # Only in the first user turn
            {"type": "text", "text": "What is the fault in this solar panel image?"}
        ]
    }
]

# === Conversation Loop ===
while True:
    # Format input with chat history and generate response
    chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=200)

    decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
    response = decoded.split("ASSISTANT:")[-1].strip()

    # Display model response
    display(Markdown(f"**🤖 Model Response:** {response}"))

    # Add model response to conversation
    conversation.append({
        "role": "assistant",
        "content": [{"type": "text", "text": response}]
    })



In [None]:
# Ask for next user input
    follow_up = input("🗨️ You: ").strip()
    if follow_up.lower() in ["exit", "quit"]:
        break

    # Add new user question (no image after first round)
    conversation.append({
        "role": "user",
        "content": [{"type": "text", "text": follow_up}]
    })


##Save the model to huggingface

➡️ It will ask you to paste your HF token
👉 Get it from: https://huggingface.co/settings/tokens

In [None]:
from huggingface_hub import notebook_login
notebook_login()


Go to https://huggingface.co/new
Set:

Model repo name: llava-solar-finetuned

Visibility: Private (or Public if you want)

Copy your repo name — e.g., your-username/llava-solar-finetuned

Push Your Model

In [None]:
from huggingface_hub import create_repo, upload_folder

repo_id = "your-username/llava-solar-finetuned"

# OPTIONAL: If you haven't created it manually
create_repo(repo_id, private=True)

# Upload LoRA adapter + processor (saved after training)
upload_folder(
    repo_id=repo_id,
    folder_path="./llava-solar-finetuned",
    path_in_repo=".",
    commit_message="Upload fine-tuned LLaVA solar fault model"
)


Load It Later in Any Notebook

In [None]:
from transformers import AutoProcessor
from peft import PeftModel
from transformers import LlavaForConditionalGeneration, BitsAndBytesConfig

# Load base model
base_model_id = "llava-hf/llava-1.5-7b-hf"
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = LlavaForConditionalGeneration.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    quantization_config=quant_config,
    device_map="auto"
)

# Load processor and LoRA adapter from Hugging Face
repo_id = "your-username/llava-solar-finetuned"

from huggingface_hub import snapshot_download
snapshot_download(repo_id)  # Optional to force download

processor = AutoProcessor.from_pretrained(repo_id)
model = PeftModel.from_pretrained(model, repo_id)

print("✅ Model loaded from Hugging Face successfully!")


##Gradio

In [None]:
!pip install gradio --quiet


Define memory + response function

In [None]:
import gradio as gr
from PIL import Image
import torch

# 🧠 Global memory to store current image and chat history
current_image = None
chat_history = []

# 🧠 Define chatbot function
def solar_fault_chat(image, user_input, history):
    global current_image, chat_history

    # If new image is uploaded, reset chat history
    if image is not None and image != current_image:
        current_image = image
        chat_history = []  # Clear chat history for new image

    # Build conversation
    conversation = []

    for user_msg, bot_msg in chat_history:
        conversation.append({
            "role": "user",
            "content": [{"type": "image"}, {"type": "text", "text": user_msg}]
        })
        conversation.append({
            "role": "assistant",
            "content": [{"type": "text", "text": bot_msg}]
        })

    # Add the current user input
    conversation.append({
        "role": "user",
        "content": [{"type": "image"}, {"type": "text", "text": user_input}]
    })

    # Format input using tokenizer’s chat template
    chat_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)

    # Prepare inputs for model
    inputs = processor(image, chat_text, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=150)

    # Decode response
    decoded = processor.tokenizer.decode(output[0], skip_special_tokens=True)
    response = decoded.split("ASSISTANT:")[-1].strip()

    # Update history
    chat_history.append((user_input, response))

    return response, chat_history


Create Gradio Interface

In [None]:
# Gradio Blocks Interface for chat + image
with gr.Blocks() as demo:
    gr.Markdown("## ☀️ Solar Panel Fault Diagnosis Chatbot")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Solar Panel Image")
        chatbot = gr.Chatbot()

    with gr.Row():
        user_input = gr.Textbox(label="Ask your question about the image")
        submit_btn = gr.Button("Send")

    # When button clicked, call solar_fault_chat
    def chat_wrapper(img, msg, history):
        response, updated_history = solar_fault_chat(img, msg, history)
        return "", updated_history

    submit_btn.click(chat_wrapper, inputs=[image_input, user_input, chatbot], outputs=[user_input, chatbot])

demo.launch(share=True)
