# Forma Fitness - AI Chatbot Training

**IMPORTANT: Run cells ONE BY ONE, not "Run All"!**

This trains a bilingual (Arabic/English) fitness chatbot.

**Time:** ~2 hours on T4 GPU for good quality

---
## Step 1: Check GPU (must be T4 or better)

In [None]:
# Check GPU - MUST show T4 or A100
!nvidia-smi --query-gpu=name,memory.total --format=csv

import torch
if not torch.cuda.is_available():
    print("ERROR: No GPU! Go to Runtime > Change runtime type > T4 GPU")
else:
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    print("Ready to train!")

## Step 2: Install Dependencies

**After this cell completes, you MUST restart the runtime!**
- Go to Runtime > Restart runtime
- Then continue from Step 3

In [None]:
# Install packages - watch for errors!
!pip install -q transformers==4.46.0 datasets accelerate peft trl bitsandbytes
!pip install -q sentencepiece protobuf

print("")
print("=" * 50)
print("INSTALLATION COMPLETE!")
print("=" * 50)
print("")
print("NOW: Go to Runtime > Restart runtime")
print("THEN: Continue from Step 3 (skip this cell)")
print("=" * 50)

## Step 3: Download Training Data

In [None]:
import json
import urllib.request

# Download training data from GitHub
DATA_URL = "https://raw.githubusercontent.com/Abdellatifemara/Forma/main/apps/api/training-data/train_merged.json"

print("Downloading training data...")
urllib.request.urlretrieve(DATA_URL, "train_data.json")

with open("train_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(f"Loaded {len(data)} training samples!")

# Show categories
categories = {}
for item in data:
    cat = item.get('category', 'fitness')
    categories[cat] = categories.get(cat, 0) + 1
print("\nCategories:")
for cat, count in sorted(categories.items(), key=lambda x: -x[1])[:8]:
    print(f"  {cat}: {count}")

## Step 4: Load Model

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Model that supports Arabic
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"

print(f"Loading {MODEL_NAME}...")
print("This takes 2-3 minutes...")

# 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Prepare for training
model = prepare_model_for_kbit_training(model)

# Add LoRA adapters
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"\nModel loaded!")
print(f"Trainable parameters: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")

## Step 5: Prepare Dataset

In [None]:
from datasets import Dataset

# System prompt
SYSTEM_PROMPT = """You are Forma AI, a bilingual fitness assistant for Egypt.

You provide accurate, science-based advice on:
- Exercise technique and programming
- Nutrition and meal planning (Egyptian foods, fast food, restaurants)
- Supplements (evidence-based only)

You understand:
- Arabic (Egyptian dialect)
- Franco Arabic (3aml eh, ezayak)
- English

You know Egyptian foods, gyms, restaurants, and local fitness culture.
Always prioritize user safety. Recommend doctors for injuries/medical issues."""

# Filter out bad samples (empty/null values)
clean_data = []
for item in data:
    instruction = item.get('instruction', item.get('input', ''))
    output = item.get('output', item.get('response', ''))
    
    # Skip if either is empty/None
    if instruction and output and len(str(instruction)) > 0 and len(str(output)) > 0:
        clean_data.append({
            'instruction': str(instruction),
            'output': str(output),
            'category': item.get('category', 'fitness')
        })

print(f"Original samples: {len(data)}")
print(f"Clean samples: {len(clean_data)}")
print(f"Removed {len(data) - len(clean_data)} bad samples")

def format_chat(item):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": item['instruction']},
        {"role": "assistant", "content": item['output']},
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    return {"text": text}

dataset = Dataset.from_list(clean_data)
dataset = dataset.map(format_chat)

print(f"\nDataset ready: {len(dataset)} samples")
print(f"\nExample:")
print(dataset[100]['text'][:400] + "...")

## Step 6: TRAIN! (This takes 1-2 hours)

**Watch the loss value go down:**
- Starting: ~2.5
- After 1000 steps: ~1.0
- After 3000 steps: ~0.5
- Final: ~0.3 (good!)

In [None]:
from trl import SFTTrainer, SFTConfig
import time

# Training settings
MAX_STEPS = 5000      # Good quality - takes ~2 hours
BATCH_SIZE = 2
GRAD_ACCUM = 4
LEARNING_RATE = 2e-4

print("=" * 60)
print("TRAINING STARTED")
print("=" * 60)
print(f"Steps: {MAX_STEPS}")
print(f"Estimated time: {MAX_STEPS * 1.5 / 60:.0f} - {MAX_STEPS * 2 / 60:.0f} minutes")
print("")
print("Watch the 'loss' value - it should decrease over time!")
print("Good final loss: < 0.5")
print("=" * 60)

start_time = time.time()

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=dataset,
    args=SFTConfig(
        output_dir="./forma-model",
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        warmup_steps=100,
        max_steps=MAX_STEPS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=50,
        save_steps=1000,
        save_total_limit=2,
        optim="paged_adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=42,
        report_to="none",
        dataset_text_field="text",
        max_seq_length=2048,
    ),
)

# Train!
stats = trainer.train()

duration = (time.time() - start_time) / 60
print("")
print("=" * 60)
print("TRAINING COMPLETE!")
print("=" * 60)
print(f"Time: {duration:.1f} minutes")
print(f"Final loss: {stats.training_loss:.4f}")
if stats.training_loss < 0.5:
    print("Loss is good! Model learned well.")
elif stats.training_loss < 1.0:
    print("Loss is okay. Model learned the basics.")
else:
    print("Loss is high. Consider training more steps.")

## Step 7: Test the Model

In [None]:
# Test the trained model
model.eval()

test_prompts = [
    "How many sets should I do for muscle growth?",
    "How many calories in Koshari?",
    "kam calorie fel foul?",
    "How many calories in a Big Mac from McDonald's Egypt?",
    "What Egyptian foods are high in protein?",
]

print("Testing model...\n")
print("=" * 60)

for prompt in test_prompts:
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=150,
            temperature=0.7,
            do_sample=True,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract assistant response
    if "assistant" in response.lower():
        response = response.split("assistant")[-1].strip()
    
    print(f"Q: {prompt}")
    print(f"A: {response[:300]}")
    print("-" * 60)

## Step 8: Save to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

SAVE_PATH = "/content/drive/MyDrive/Forma-AI-Model"
!mkdir -p {SAVE_PATH}

print("Saving model to Google Drive...")
model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

print(f"Model saved to: {SAVE_PATH}")

## Step 9: Export to GGUF (for Ollama)

This converts the model to GGUF format for use with Ollama.

In [None]:
# Install llama.cpp for GGUF conversion
!pip install -q llama-cpp-python

# First, merge LoRA weights with base model
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()

MERGED_PATH = "/content/forma-merged"
merged_model.save_pretrained(MERGED_PATH)
tokenizer.save_pretrained(MERGED_PATH)

print("Model merged. Now converting to GGUF...")
print("This may take 10-15 minutes...")

In [None]:
# Clone llama.cpp and convert
!git clone --depth 1 https://github.com/ggerganov/llama.cpp
!pip install -q -r llama.cpp/requirements.txt

# Convert to GGUF
!python llama.cpp/convert_hf_to_gguf.py /content/forma-merged --outfile /content/forma-fitness.gguf --outtype q4_k_m

# Copy to Google Drive
!cp /content/forma-fitness.gguf "/content/drive/MyDrive/Forma-AI-Model/forma-fitness.gguf"

print("")
print("=" * 60)
print("DONE!")
print("=" * 60)
print("")
print("Your GGUF model is saved at:")
print("  Google Drive > Forma-AI-Model > forma-fitness.gguf")
print("")
print("To use with Ollama:")
print("1. Download forma-fitness.gguf from Google Drive")
print("2. Create a Modelfile with: FROM ./forma-fitness.gguf")
print("3. Run: ollama create forma-fitness -f Modelfile")
print("4. Test: ollama run forma-fitness")