<a href="https://colab.research.google.com/github/SinaLab/ImageEvalSharedTask2025/blob/main/ImageValFinetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1.1 Check GPU and Install Dependencies

In [None]:
# Check GPU
!nvidia-smi

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install LlamaFactory
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
!pip install -e ".[torch,metrics]"

# Install additional dependencies
!pip install transformers>=4.41.0
!pip install accelerate
!pip install peft

Tue Jun 24 07:56:15 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

1.2 Verify **Installation**

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

# Check available memory
if torch.cuda.is_available():
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

PyTorch version: 2.6.0+cu124
CUDA available: True
GPU: NVIDIA A100-SXM4-40GB
GPU Memory: 42.5 GB


# Step 2: Prepare Your Dataset

#2.1 Create Data Structure

In [None]:
import os

# Create directories
base_dir = "/content/drive/MyDrive/ImageEval_Subtask2"
os.makedirs(f"{base_dir}/images", exist_ok=True)

print(f"Base directory: {base_dir}")
print("Make sure you have:")
print("1. TrainSubtask2.xlsx in the base directory")
print("2. Image files in the images/ folder")

Base directory: /content/drive/MyDrive/ImageEval_Subtask2
Make sure you have:
1. TrainSubtask2.xlsx in the base directory
2. Image files in the images/ folder


# 2.2 Generate Training Data

In [None]:
import pandas as pd
import json
import os
'''FRom Me '''
# Install required packages
!pip install --quiet gspread gspread_dataframe

# Authenticate Google account
from google.colab import auth
auth.authenticate_user()

import gspread
from gspread_dataframe import get_as_dataframe
from google.auth import default

# Authorize and access the sheet
creds, _ = default()
gc = gspread.authorize(creds)

# Open the Google Sheet by its name
spreadsheet = gc.open("TrainSubtask2")  # Sheet name must match what you see in Drive
worksheet = spreadsheet.sheet1  # or .worksheet("Your Sheet Name") if not the first tab

# Convert to pandas DataFrame
df = get_as_dataframe(worksheet, evaluate_formulas=True)

'''End me '''










# Read Excel file
#excel_file = "/content/drive/MyDrive/ImageVal/Train/TrainSubtask2.xlsx"
#df = pd.read_excel(excel_file)

# Create training data with ABSOLUTE paths
training_data = []

for _, row in df.iterrows():
    if pd.notna(row['File Name']) and pd.notna(row['Description']):
        # Use absolute path to your images
        image_path = f"/content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/{row['File Name']}.jpg"

        entry = {
            "conversations": [
                {
                    "from": "human",
                    "value": "<image>Describe this image in Arabic."
                },
                {
                    "from": "gpt",
                    "value": str(row['Description'])
                }
            ],
            "images": [
                image_path
            ]
        }
        training_data.append(entry)

# Save with absolute paths
with open("/content/drive/MyDrive/ImageEval_Subtask2/llamafactory_training_data.json", 'w', encoding='utf-8') as f:
    json.dump(training_data, f, ensure_ascii=False, indent=2)

print(f"Created {len(training_data)} training examples with ABSOLUTE paths")
print(f"Example path: {training_data[0]['images'][0]}")

# Verify all paths exist
missing_count = 0
for i, entry in enumerate(training_data):
    image_path = entry['images'][0]
    if not os.path.exists(image_path):
        if missing_count < 5:  # Show first 5 missing
            print(f"Missing: {image_path}")
        missing_count += 1

if missing_count == 0:
    print("✅ All image paths verified!")
else:
    print(f"❌ Found {missing_count} missing images")

Created 2717 training examples with ABSOLUTE paths
Example path: /content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/S.I.PH01.01.001.jpg
✅ All image paths verified!


#2.3 Verify Image Files

In [None]:
import os
from PIL import Image

image_dir = "/content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain"
missing_images = []
found_images = []

for entry in training_data:
    image_path = entry['images'][0]
    full_path = os.path.join("/content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain", image_path)

    if os.path.exists(full_path):
        found_images.append(image_path)
        # Verify it's a valid image
        try:
            img = Image.open(full_path)
            img.verify()
        except Exception as e:
            print(f"Invalid image: {image_path} - {e}")
    else:
        missing_images.append(image_path)

print(f"Found {len(found_images)} images")
print(f"Missing {len(missing_images)} images")

if missing_images:
    print("Missing files:")
    for img in missing_images[:5]:
        print(f"  - {img}")

Found 2717 images
Missing 0 images


# Step 3: Register Dataset in LlamaFactory

In [None]:
import json
import os

# Load your current training data
with open("/content/drive/MyDrive/ImageEval_Subtask2/llamafactory_training_data.json", 'r', encoding='utf-8') as f:
    training_data = json.load(f)

print(f"Loaded {len(training_data)} entries")

# Convert relative paths to absolute paths
for entry in training_data:
    relative_path = entry['images'][0]  # e.g., "Train/images/S.I.PH01.01.001.jpg"
    # Convert to absolute path
    absolute_path = f"/content/drive/MyDrive/ImageVal/{relative_path}"
    entry['images'][0] = absolute_path

# Save updated JSON with absolute paths
with open("/content/drive/MyDrive/ImageVal/llamafactory_training_data.json", 'w', encoding='utf-8') as f:
    json.dump(training_data, f, ensure_ascii=False, indent=2)

print("✅ Updated all paths to absolute paths")
print(f"Example path: {training_data[0]['images'][0]}")

# Verify the paths work now
print("\nVerifying updated paths...")
missing_count = 0
for i, entry in enumerate(training_data):
    image_path = entry['images'][0]
    exists = os.path.exists(image_path)

    if not exists:
        if missing_count < 3:  # Show first 3 missing
            print(f"❌ Missing: {image_path}")
        missing_count += 1
    elif i < 3:  # Show first 3 found
        print(f"✅ Found: {image_path}")

print(f"\nSummary: {len(training_data) - missing_count} found, {missing_count} missing")

if missing_count == 0:
    print("🎉 All paths fixed! Ready for training.")
else:
    print(f"⚠️ Still have {missing_count} missing files")

Loaded 2717 entries
✅ Updated all paths to absolute paths
Example path: /content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain//content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/S.I.PH01.01.001.jpg

Verifying updated paths...
❌ Missing: /content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain//content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/S.I.PH01.01.001.jpg
❌ Missing: /content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain//content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/S.I.PH01.01.002.jpg
❌ Missing: /content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain//content/drive/MyDrive/ImageEval_Subtask2/ImageEvalTrain/S.I.PH01.01.003.jpg

Summary: 0 found, 2717 missing
⚠️ Still have 2717 missing files


#Step 4: Create Training Configuration

In [None]:
# For T4 GPU (15GB VRAM) - more conservative settings
conservative_config = """### model
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
image_max_pixels: 131072
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.1
lora_target: all

### dataset
dataset: arabic_captions
template: qwen2_vl
cutoff_len: 1024
overwrite_cache: true
preprocessing_num_workers: 2
dataloader_num_workers: 1

### output
output_dir: /content/drive/MyDrive/ImageVal/qwen2_5vl_arabic_model
logging_steps: 5
save_steps: 25
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none

### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 16
learning_rate: 2.0e-5
num_train_epochs: 15.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
gradient_checkpointing: true

### eval
val_size: 0.2
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 10
"""

# Use this if you get OOM errors
conservative_config_path = "/content/qwen_arabic_conservative.yaml"
with open(conservative_config_path, 'w') as f:
    f.write(conservative_config)

print("Conservative config also saved (use if OOM occurs)")

Conservative config also saved (use if OOM occurs)


# Step 5: Start Traing

In [None]:
%cd /content/LLaMA-Factory

# Start training
!llamafactory-cli train /content/qwen_arabic_conservative.yaml

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 62% 339/544 [00:57<00:34,  5.87it/s][A
 62% 340/544 [00:57<00:34,  5.91it/s][A
 63% 341/544 [00:58<00:34,  5.95it/s][A
 63% 342/544 [00:58<00:33,  5.97it/s][A
 63% 343/544 [00:58<00:34,  5.85it/s][A
 63% 344/544 [00:58<00:34,  5.83it/s][A
 63% 345/544 [00:58<00:33,  5.93it/s][A
 64% 346/544 [00:59<00:33,  5.90it/s][A
 64% 347/544 [00:59<00:33,  5.88it/s][A
 64% 348/544 [00:59<00:32,  5.98it/s][A
 64% 349/544 [00:59<00:32,  6.04it/s][A
 64% 350/544 [00:59<00:32,  5.99it/s][A
 65% 351/544 [00:59<00:32,  5.94it/s][A
 65% 352/544 [01:00<00:32,  5.94it/s][A
 65% 353/544 [01:00<00:31,  5.97it/s][A
 65% 354/544 [01:00<00:31,  5.99it/s][A
 65% 355/544 [01:00<00:31,  5.94it/s][A
 65% 356/544 [01:00<00:31,  5.97it/s][A
 66% 357/544 [01:00<00:31,  5.94it/s][A
 66% 358/544 [01:01<00:31,  5.92it/s][A
 66% 359/544 [01:01<00:31,  5.94it/s][A
 66% 360/544 [01:01<00:30,  5.96it/s][A
 66% 361/544 [01:01<00:30,  6.00i

In [None]:
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from PIL import Image
import torch
import os
import json
from tqdm import tqdm
import matplotlib.pyplot as plt

# Test with a checkpoint (adjust checkpoint number)
checkpoint_path = "/content/drive/MyDrive/ImageVal/qwen2_5vl_arabic_model/checkpoint-50"

if os.path.exists(checkpoint_path):
    print("Loading model...")
    # Load model and processor
    model = Qwen2VLForConditionalGeneration.from_pretrained(
        checkpoint_path,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )
    processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
    print("✅ Model loaded successfully!")

    # Test folder path
    test_folder = "/content/drive/MyDrive/ImageVal/Test/images"

    if os.path.exists(test_folder):
        # Get all image files
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
        image_files = []

        for file in os.listdir(test_folder):
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_files.append(file)

        print(f"Found {len(image_files)} images in test folder")

        # Store results
        results = []

        # Process each image
        for i, image_file in enumerate(tqdm(image_files, desc="Generating captions")):
            try:
                image_path = os.path.join(test_folder, image_file)
                image = Image.open(image_path)

                # Create prompt
                messages = [
                    {
                        "role": "user",
                        "content": [
                            {"type": "image", "image": image},
                            {"type": "text", "text": "Describe this image in Arabic."}
                        ]
                    }
                ]

                # Process and generate
                text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
                inputs = processor(text=[text], images=[image], return_tensors="pt", padding=True)
                inputs = inputs.to("cuda")

                with torch.no_grad():
                    outputs = model.generate(
                        **inputs,
                        max_new_tokens=128,
                        do_sample=True,
                        temperature=0.7,
                        pad_token_id=processor.tokenizer.eos_token_id
                    )

                response = processor.decode(outputs[0], skip_special_tokens=True)

                # Extract only the generated caption
                if "assistant\n" in response:
                    arabic_caption = response.split("assistant\n")[-1].strip()
                else:
                    # Fallback extraction
                    arabic_caption = response.split("Describe this image in Arabic.")[-1].strip()

                # Store result
                result = {
                    "image_file": image_file,
                    "image_path": image_path,
                    "arabic_caption": arabic_caption
                }
                results.append(result)

                # Print progress every 10 images
                if (i + 1) % 10 == 0 or i < 5:
                    print(f"\n--- Image {i+1}/{len(image_files)}: {image_file} ---")
                    print(f"Arabic Caption: {arabic_caption}")

            except Exception as e:
                print(f"❌ Error processing {image_file}: {e}")
                results.append({
                    "image_file": image_file,
                    "image_path": image_path,
                    "arabic_caption": f"Error: {str(e)}"
                })

        # Save results to JSON
        output_file = "/content/drive/MyDrive/ImageVal/generated_arabic_captions.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)

        print(f"\n🎉 Completed! Generated captions for {len(results)} images")
        print(f"Results saved to: {output_file}")

        # Show summary statistics
        successful = len([r for r in results if not r['arabic_caption'].startswith('Error:')])
        failed = len(results) - successful
        print(f"Successful: {successful}, Failed: {failed}")

        # Display first 5 results
        print("\n=== First 5 Results ===")
        for i, result in enumerate(results[:5]):
            print(f"\n{i+1}. {result['image_file']}")
            print(f"   Caption: {result['arabic_caption']}")

    else:
        print(f"❌ Test folder not found: {test_folder}")

else:
    print(f"❌ Checkpoint not found: {checkpoint_path}")
    print("Available checkpoints:")
    model_dir = "/content/drive/MyDrive/ImageVal/qwen2_5vl_arabic_model"
    if os.path.exists(model_dir):
        checkpoints = [d for d in os.listdir(model_dir) if d.startswith('checkpoint-')]
        for cp in sorted(checkpoints):
            print(f"  - {cp}")


if 'results' in locals() and results:
    # Create DataFrame
    df_results = pd.DataFrame(results)

    # Save to CSV
    csv_file = "/content/drive/MyDrive/ImageVal/fine_tune_generated_arabic_captions.csv"
    df_results.to_csv(csv_file, index=False, encoding='utf-8-sig')
    print(f"📊 Results also saved to CSV: {csv_file}")

    # Display summary
    print(f"\nDataFrame shape: {df_results.shape}")
    print(df_results.head())