# Car Damage Assessment - Working GPU Setup

This notebook contains only the verified working components for car damage assessment with GPU support.

In [None]:
# Complete GPU Setup & Verification for Car Damage Assessment
import os
import torch
import gc
from transformers import AutoProcessor
from unsloth import FastVisionModel
from datasets import load_dataset
from PIL import Image

print("🚀 === COMPLETE GPU SETUP & VERIFICATION ===")

# Import verification
print("✅ All required packages imported")

print("\n🧪 GPU Status Check:")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"✅ GPU: {torch.cuda.get_device_name()}")
    print(f"✅ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    print(f"✅ CUDA Version: {torch.version.cuda}")
else:
    print("❌ CUDA not available!")
    exit()

# Test basic GPU operations
print("\n🧪 Testing basic GPU operations...")
try:
    x = torch.randn(1000, 1000, device='cuda')
    y = torch.randn(1000, 1000, device='cuda')
    z = torch.mm(x, y)
    print("✅ Basic GPU operations working!")
    del x, y, z
    torch.cuda.empty_cache()
except Exception as e:
    print(f"❌ GPU operations failed: {e}")
    exit()

# Load and verify model
print("\n📦 Loading model and dataset...")
try:
    # Load the trained model
    model, tokenizer = FastVisionModel.from_pretrained(
        model_name="Kakyoin03/car-damage-assessment-llama-vision",
        load_in_4bit=True,
        device_map="auto",
    )
    FastVisionModel.for_inference(model)
    print("✅ Model loaded successfully!")
    
    # Load test dataset
    dataset = load_dataset("Kakyoin03/car_damage_detection_dataset", split="train")
    print(f"✅ Dataset loaded: {len(dataset)} samples")
    
    print(f"✅ Model type: {type(model)}")
    print(f"✅ Model device: {next(model.parameters()).device}")
    print(f"✅ Tokenizer loaded: {type(tokenizer)}")
    
except Exception as e:
    print(f"❌ Model/Dataset loading failed: {e}")
    print("🔄 This is expected if models aren't deployed yet")

print("\n🎉 EVERYTHING READY FOR GPU INFERENCE!")
print("✅ You can now run the inference code below.")
print("=" * 56)

In [None]:
# Simple Working Inference Function
def analyze_car_damage_simple(image_path_or_pil, model, tokenizer):
    """
    Simple car damage analysis function that works reliably
    """
    try:
        # Clear GPU memory
        torch.cuda.empty_cache()
        
        # Load and prepare image
        if isinstance(image_path_or_pil, str):
            image = Image.open(image_path_or_pil).convert("RGB")
        else:
            image = image_path_or_pil.convert("RGB")
        
        # Resize if too large
        max_size = (1024, 1024)
        if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
            image.thumbnail(max_size, Image.Resampling.LANCZOS)
        
        # Instruction
        instruction = """You are a car damage assessment expert. 
        Analyze this image and describe:
        1. What parts of the car are damaged
        2. Type of damage (scratch, dent, crack, etc.)
        3. Severity level (minor, moderate, major)
        Be concise and accurate."""
        
        # Prepare input
        messages = [
            {"role": "user", "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": instruction}
            ]}
        ]
        
        # Tokenize
        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt",
        ).to("cuda")
        
        # Generate
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=150,
                use_cache=False,
                temperature=0.8,
                top_p=0.9,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id if hasattr(tokenizer, 'pad_token_id') else tokenizer.eos_token_id
            )
        
        # Decode
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        
        # Extract response
        if "assistant" in response:
            response = response.split("assistant")[-1].strip()
        
        # Clean up
        torch.cuda.empty_cache()
        
        return response
        
    except Exception as e:
        torch.cuda.empty_cache()
        return f"Analysis failed: {str(e)}"

print("✅ Simple inference function defined!")
print("📝 Usage: result = analyze_car_damage_simple(image, model, tokenizer)")

# Test if model is available
try:
    if 'model' in globals() and 'tokenizer' in globals():
        print("✅ Model and tokenizer are ready for testing!")
        
        # Test with first dataset image if available
        if 'dataset' in globals() and len(dataset) > 0:
            print("\n🧪 Testing with sample image...")
            test_image = dataset[0]["image"]
            result = analyze_car_damage_simple(test_image, model, tokenizer)
            print(f"📊 Sample result: {result[:100]}...")
    else:
        print("ℹ️ Model not loaded yet - run the setup cell first")
except Exception as e:
    print(f"ℹ️ Test skipped: {e}")

## Usage Examples

Once the model is loaded, you can use the inference function like this:

```python
# Analyze an image from file
result = analyze_car_damage_simple("path/to/image.jpg", model, tokenizer)
print(result)

# Analyze an image from dataset
test_image = dataset[0]["image"]
result = analyze_car_damage_simple(test_image, model, tokenizer)
print(result)
```

## What This Notebook Provides

✅ **Complete GPU Setup** - Verifies CUDA, GPU memory, and basic operations  
✅ **Model Loading** - Loads your trained car damage assessment model  
✅ **Working Inference** - Simple, reliable damage analysis function  
✅ **Error Handling** - Robust error handling and fallbacks  
✅ **Memory Management** - Proper GPU memory cleanup  

**This is the clean, minimal version with only working components!**

# Training Section

The following cells contain the complete training pipeline for fine-tuning the Llama Vision model on car damage assessment.

In [None]:
# Training Setup and Configuration
from unsloth import FastVisionModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from datasets import load_dataset

print("🔧 === TRAINING SETUP ===")

# Training configuration
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# Load base model for training
print("📦 Loading base model for training...")
model, tokenizer = FastVisionModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-11B-Vision-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# Add LoRA adapters for efficient training
print("🔗 Adding LoRA adapters...")
model = FastVisionModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",
                      "embed_tokens", "lm_head",], # Add for continual pretraining
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

print("✅ Model prepared for training!")
print(f"📊 Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
print(f"📊 Total parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# Dataset Preparation and Formatting
print("📚 === DATASET PREPARATION ===")

# Load car damage dataset
dataset = load_dataset("Kakyoin03/car_damage_detection_dataset", split="train")
print(f"✅ Dataset loaded: {len(dataset)} samples")

# Alpaca prompt template for car damage assessment
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    """Format the dataset for training"""
    instructions = examples["instruction"] if "instruction" in examples else ["Analyze this car damage image and provide a detailed assessment."] * len(examples["image"])
    inputs       = examples["text"] if "text" in examples else [""] * len(examples["image"])
    outputs      = examples["output"] if "output" in examples else examples["label"]  # Use label if output doesn't exist
    
    texts = []
    images = examples["image"]
    
    for instruction, input_text, output, image in zip(instructions, inputs, outputs, images):
        # Create the prompt
        text = alpaca_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    
    return {
        "text": texts,
        "image": images,
    }

# Apply formatting to dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

# Split dataset (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
dataset = dataset.train_test_split(test_size=len(dataset) - train_size, seed=42)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

print(f"✅ Training samples: {len(train_dataset)}")
print(f"✅ Validation samples: {len(eval_dataset)}")
print(f"📝 Sample formatted text: {train_dataset[0]['text'][:200]}...")
print(f"🖼️ Sample image type: {type(train_dataset[0]['image'])}")

In [None]:
# Training Configuration and Trainer Setup
print("⚙️ === TRAINING CONFIGURATION ===")

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size = 2,        # Adjust based on GPU memory
    per_device_eval_batch_size = 2,         # Adjust based on GPU memory
    gradient_accumulation_steps = 4,        # Effective batch size = 2 * 4 = 8
    warmup_steps = 5,
    num_train_epochs = 3,                   # Adjust as needed
    max_steps = 1000,                       # Override epochs if specified
    learning_rate = 2e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps = 10,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = "./car_damage_model",      # Save checkpoints here
    save_steps = 100,
    save_total_limit = 3,                   # Keep only 3 checkpoints
    evaluation_strategy = "steps",
    eval_steps = 100,
    load_best_model_at_end = True,
    metric_for_best_model = "eval_loss",
    greater_is_better = False,
    report_to = None,                       # Disable wandb logging
    run_name = "car_damage_llama_vision",
)

# Create trainer
print("🎯 Setting up trainer...")
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    dataset_kwargs = {"skip_prepare_dataset": True},
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args,
)

print("✅ Trainer configured!")
print(f"📊 GPU Memory before training: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print("🚀 Ready to start training!")

In [None]:
# Start Training
print("🚀 === STARTING TRAINING ===")
print("⚠️ This will take several hours depending on your dataset size and GPU!")

# Show training progress
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved before training.")

# Start training
trainer_stats = trainer.train()

# Show training completion stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)

print(f"🎉 Training completed!")
print(f"📊 Peak reserved memory = {used_memory} GB.")
print(f"📊 Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"📊 Peak reserved memory % of max memory = {used_percentage} %.")
print(f"📊 Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Print training metrics
print(f"\n📈 Training Results:")
print(f"✅ Final training loss: {trainer_stats.training_loss:.4f}")
print(f"✅ Training steps completed: {trainer_stats.global_step}")
print(f"✅ Training time: {trainer_stats.metrics['train_runtime']:.2f} seconds")

In [None]:
# Save Model Locally and Deploy to HuggingFace
print("💾 === SAVING AND DEPLOYING MODEL ===")

# Save model locally
local_save_path = "./car_damage_assessment_model"
print(f"💾 Saving model locally to: {local_save_path}")

model.save_pretrained(local_save_path) # Local saving
tokenizer.save_pretrained(local_save_path)

# Save to 16bit for inference
print("🔄 Converting to 16bit for faster inference...")
model.save_pretrained_merged(
    local_save_path + "_16bit", 
    tokenizer, 
    save_method = "merged_16bit"
)

# Deploy to HuggingFace Hub
print("🚀 Deploying to HuggingFace Hub...")
hf_model_name = "Kakyoin03/car-damage-assessment-llama-vision"

try:
    # Push to HuggingFace
    model.push_to_hub(
        hf_model_name,
        token = None,  # You'll need to login with `huggingface-cli login` first
    )
    tokenizer.push_to_hub(
        hf_model_name,
        token = None,
    )
    print(f"✅ Model successfully deployed to: https://huggingface.co/{hf_model_name}")
    
    # Also push 16bit version
    model.push_to_hub_merged(
        hf_model_name + "-16bit",
        tokenizer,
        save_method = "merged_16bit",
        token = None,
    )
    print(f"✅ 16bit model deployed to: https://huggingface.co/{hf_model_name}-16bit")
    
except Exception as e:
    print(f"⚠️ HuggingFace deployment failed: {e}")
    print("💡 Make sure you're logged in with: huggingface-cli login")
    print(f"📁 Model saved locally at: {local_save_path}")

print(f"\n🎉 Training and deployment complete!")
print(f"📁 Local model path: {local_save_path}")
print(f"🔗 HuggingFace model: {hf_model_name}")
print("✅ You can now use this model for inference!")

In [None]:
# Test Trained Model
print("🧪 === TESTING TRAINED MODEL ===")

# Prepare model for inference
FastVisionModel.for_inference(model) # Enable native 2x faster inference

# Test function with the newly trained model
def test_trained_model(image, instruction="Analyze this car damage and provide assessment"):
    """Test the newly trained model"""
    try:
        # Prepare messages
        messages = [
            {"role": "user", "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": instruction}
            ]}
        ]
        
        # Tokenize
        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt",
        ).to("cuda")
        
        # Generate
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=200,
                use_cache=True,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
            )
        
        # Decode response
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        
        # Extract assistant response
        if "assistant" in response:
            response = response.split("assistant")[-1].strip()
        
        return response
        
    except Exception as e:
        return f"Test failed: {str(e)}"

# Test with validation samples
print("🧪 Testing with validation samples...")
test_results = []

for i in range(min(3, len(eval_dataset))):  # Test first 3 samples
    test_image = eval_dataset[i]["image"]
    result = test_trained_model(test_image)
    test_results.append(result)
    print(f"\n📊 Test {i+1} Result:")
    print(f"🔍 {result[:150]}...")

print(f"\n✅ Training validation complete!")
print(f"📊 Tested {len(test_results)} samples")
print("🎉 Your car damage assessment model is ready!")