# üèãÔ∏è Forma Fitness - AI Chatbot Training

This notebook trains a bilingual (Arabic/English) fitness chatbot for the Forma app.

**What it does:**
- Trains Qwen2.5-3B-Instruct (supports Arabic!)
- Uses your 8,140 fitness Q&A samples
- Exports to GGUF format for Ollama
- Saves to Google Drive

**Time:** ~30-45 minutes on T4, ~15-20 minutes on A100

---
## Step 1: Check GPU

In [None]:
# Check what GPU we got
!nvidia-smi --query-gpu=name,memory.total --format=csv

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## Step 2: Install Dependencies

In [None]:
%%capture
# Install Unsloth for 2-3x faster training
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes xformers
!pip install datasets huggingface_hub

print("‚úÖ Dependencies installed!")

## Step 3: Download Training Data

In [None]:
import json
import urllib.request

# Download from your GitHub repo
DATA_URL = "https://raw.githubusercontent.com/Abdellatifemara/Forma/main/apps/api/training-data/train_balanced.json"

print("Downloading training data...")
urllib.request.urlretrieve(DATA_URL, "train_data.json")

with open("train_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(f"‚úÖ Loaded {len(data)} training samples!")
print(f"\nSample categories:")
categories = {}
for item in data:
    cat = item.get('category', 'unknown')
    categories[cat] = categories.get(cat, 0) + 1
for cat, count in sorted(categories.items(), key=lambda x: -x[1])[:10]:
    print(f"  {cat}: {count}")

## Step 4: Load Model with Unsloth

In [None]:
from unsloth import FastLanguageModel

# Model config
MODEL_NAME = "unsloth/Qwen2.5-3B-Instruct-bnb-4bit"  # Supports Arabic!
MAX_SEQ_LENGTH = 2048
LORA_R = 32  # Higher = more capacity

print(f"Loading {MODEL_NAME}...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,  # Auto-detect
    load_in_4bit=True,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=LORA_R,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                   "gate_proj", "up_proj", "down_proj"],
    lora_alpha=LORA_R,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

print("‚úÖ Model loaded with LoRA!")

## Step 5: Prepare Dataset

In [None]:
from datasets import Dataset

# System prompt for Forma
SYSTEM_PROMPT = """You are Forma AI, a bilingual fitness assistant for Egypt.

You provide accurate, science-based advice on:
- Exercise technique and programming
- Nutrition and meal planning
- Supplements (evidence-based only)

You understand:
- Arabic (Egyptian dialect)
- Franco Arabic (3aml eh, ezayak)
- English

You know Egyptian foods, gyms, and local fitness culture.
Always prioritize user safety. Recommend doctors for injuries/medical issues."""

def format_chat(item):
    """Convert to ChatML format"""
    instruction = item.get('instruction', item.get('input', ''))
    output = item.get('output', item.get('response', ''))
    
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": instruction},
        {"role": "assistant", "content": output},
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    return {"text": text}

# Create dataset
dataset = Dataset.from_list(data)
dataset = dataset.map(format_chat)

print(f"‚úÖ Dataset ready: {len(dataset)} samples")
print(f"\nExample:")
print(dataset[0]['text'][:500] + "...")

## Step 6: Train! üöÄ

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

# Training config
MAX_STEPS = 2000  # Increase for better quality
BATCH_SIZE = 2
GRAD_ACCUM = 4  # Effective batch = 8
LEARNING_RATE = 2e-4

print(f"Starting training...")
print(f"  Steps: {MAX_STEPS}")
print(f"  Batch size: {BATCH_SIZE} x {GRAD_ACCUM} = {BATCH_SIZE * GRAD_ACCUM}")
print(f"  Learning rate: {LEARNING_RATE}")
print("\n" + "="*50)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        warmup_steps=50,
        max_steps=MAX_STEPS,
        learning_rate=LEARNING_RATE,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=25,
        save_steps=500,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=42,
        output_dir="forma-fitness-model",
        report_to="none",
    ),
)

# Train!
stats = trainer.train()

print("\n" + "="*50)
print(f"‚úÖ Training complete!")
print(f"   Final loss: {stats.training_loss:.4f}")
print(f"   Time: {stats.metrics['train_runtime']/60:.1f} minutes")

## Step 7: Test the Model

In [None]:
FastLanguageModel.for_inference(model)

test_prompts = [
    "How many sets should I do for muscle growth?",
    "What Egyptian foods are high in protein?",
    "I have a shoulder injury, what exercises can I do?",
    "What is Forma?",
]

print("Testing model...\n")
print("="*50)

for prompt in test_prompts:
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=150,
        use_cache=True,
        temperature=0.7,
        do_sample=True,
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    if "<|im_start|>assistant" in response:
        response = response.split("<|im_start|>assistant")[-1]
        response = response.split("<|im_end|>")[0].strip()
    
    print(f"Q: {prompt}")
    print(f"A: {response[:300]}...")
    print("-"*50)

## Step 8: Save to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Save LoRA model
SAVE_PATH = "/content/drive/MyDrive/Forma-AI-Model"
!mkdir -p {SAVE_PATH}

print("Saving LoRA model...")
model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

print(f"‚úÖ Model saved to: {SAVE_PATH}")

## Step 9: Export to GGUF (for Ollama)

In [None]:
# Convert to GGUF format for Ollama/llama.cpp
GGUF_PATH = f"{SAVE_PATH}/gguf"
!mkdir -p {GGUF_PATH}

print("Converting to GGUF (q4_k_m quantization)...")
print("This may take 5-10 minutes...\n")

model.save_pretrained_gguf(
    GGUF_PATH,
    tokenizer,
    quantization_method="q4_k_m"  # Good balance of speed/quality
)

print(f"\n‚úÖ GGUF saved to: {GGUF_PATH}")
print("\nTo use with Ollama:")
print("1. Download the .gguf file from Google Drive")
print("2. Create a Modelfile with: FROM ./your-model.gguf")
print("3. Run: ollama create forma-fitness -f Modelfile")
print("4. Test: ollama run forma-fitness")

## üéâ Done!

Your model is now:
1. **Saved as LoRA** in Google Drive (`Forma-AI-Model/`)
2. **Exported as GGUF** for Ollama (`Forma-AI-Model/gguf/`)

### Next Steps:
1. Download the `.gguf` file from Google Drive
2. Install [Ollama](https://ollama.ai)
3. Create model: `ollama create forma-fitness -f Modelfile`
4. Integrate with your Forma app's `/api/chat-offline` endpoint