# Llama 2 7B Fine-tuning for Therapy AI

This notebook provides a complete pipeline for fine-tuning Llama 2 7B on therapy conversation data using Google Colab.

## Prerequisites
- Hugging Face account with Llama 2 access
- Google Colab Pro (recommended for GPU memory)
- Training data in JSONL format

## Step 1: Setup Environment

In [None]:
# Install required packages
!pip install transformers torch accelerate peft bitsandbytes datasets trl huggingface_hub

In [None]:
# Import libraries
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import datasets
from huggingface_hub import login
import json

In [None]:
# Login to Hugging Face
from huggingface_hub import notebook_login
notebook_login()

## Step 2: Load and Prepare Data

In [None]:
# Load training data
def load_training_data(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                data.append(json.loads(line))
    return data

# Upload your training data file
from google.colab import files
uploaded = files.upload()

# Assuming the file is named 'therapy_training_data.jsonl'
training_data = load_training_data('therapy_training_data.jsonl')
print(f"Loaded {len(training_data)} training examples")

In [None]:
# Format data for training
def format_instruction(example):
    if 'instruction' in example and 'output' in example:
        return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    elif 'input' in example and 'output' in example:
        return f"### Human:\n{example['input']}\n\n### Assistant:\n{example['output']}"
    else:
        return str(example)

formatted_data = [format_instruction(item) for item in training_data]
print("Sample formatted data:")
print(formatted_data[0][:500] + "...")

## Step 3: Load Model and Tokenizer

In [None]:
# Model configuration
model_name = "meta-llama/Llama-2-7b-hf"
new_model = "therapy-llama2-7b"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

## Step 4: Configure LoRA

In [None]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

## Step 5: Training Configuration

In [None]:
# Training arguments
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

## Step 6: Initialize Trainer and Train

In [None]:
# Convert data to dataset
from datasets import Dataset
dataset = Dataset.from_dict({"text": formatted_data})

# Initialize trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

# Train the model
trainer.train()

## Step 7: Save and Upload Model

In [None]:
# Save the model
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

In [None]:
# Push to Hugging Face Hub
trainer.model.push_to_hub(new_model, use_temp_dir=False)
trainer.tokenizer.push_to_hub(new_model, use_temp_dir=False)

## Step 8: Test the Model

In [None]:
# Test inference
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test prompt
test_prompt = "### Instruction:\nI'm feeling anxious about my upcoming exams. Can you help me?\n\n### Response:\n"
response = generate_response(test_prompt)
print("Test Response:")
print(response)

## Step 9: Merge LoRA Weights (Optional)

In [None]:
# Merge LoRA weights with base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

merged_model = PeftModel.from_pretrained(base_model, new_model)
merged_model = merged_model.merge_and_unload()

# Save merged model
merged_model.save_pretrained("merged_therapy_llama2")
tokenizer.save_pretrained("merged_therapy_llama2")

# Push merged model
merged_model.push_to_hub("therapy-llama2-7b-merged")
tokenizer.push_to_hub("therapy-llama2-7b-merged")