## Step 1: Setup Environment

In [None]:
# Run setup script
!python kaggle_setup.py

## Step 2: Configure HuggingFace Token (Optional)

If using private models or need authentication:

In [None]:
import os
from huggingface_hub import login

# Option 1: Use Kaggle Secrets (recommended)
# Add HF_TOKEN in Kaggle Secrets
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")

# Option 2: Direct token (not recommended for public notebooks)
# hf_token = "hf_..."

login(token=hf_token)
print("‚úì Logged in to HuggingFace")

## Step 3: Configure Merging Parameters

In [None]:
import torch
from llama_merge import LLaMAMerger

# Configuration
BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"

# Your fine-tuned models (local paths or HF IDs)
FINETUNED_MODELS = [
    "llama-3.2-1b-mental-health-counselor",
    # Add more models here
]

# Calibration datasets (one per model)
DATASETS = [
    "Amod/mental_health_counseling_conversations",
    # Add corresponding datasets
]

# Parameters
OUTPUT_DIR = "./merged_models"
CACHE_DIR = "./merge_cache"
DENSITY = 0.2  # Keep top 20% of weights
NUM_CALIBRATION_SAMPLES = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {DEVICE}")
print(f"Base model: {BASE_MODEL}")
print(f"Fine-tuned models: {len(FINETUNED_MODELS)}")
print(f"Density: {DENSITY}")

## Step 4: Initialize Merger

In [None]:
# Create merger instance
merger = LLaMAMerger(
    base_model_path=BASE_MODEL,
    finetuned_model_paths=FINETUNED_MODELS,
    dataset_names=DATASETS,
    output_dir=OUTPUT_DIR,
    cache_dir=CACHE_DIR,
    density=DENSITY,
    num_calibration_samples=NUM_CALIBRATION_SAMPLES,
    device=DEVICE
)

print("‚úì Merger initialized")

## Step 5: Run All Methods and Compare

In [None]:
# This will:
# 1. Compute task vectors (if not cached)
# 2. Compute Hessians (if not cached)
# 3. Merge with all three methods
# 4. Evaluate and compare

results = merger.merge_all_methods()

print("\n" + "="*80)
print("RESULTS SUMMARY")
print("="*80)
for method, metrics in results.items():
    print(f"\n{method}:")
    print(f"  Perplexity: {metrics['perplexity']:.4f}")
    print(f"  Time: {metrics['time']:.2f}s")

# Find best method
best_method = min(results, key=lambda k: results[k]['perplexity'])
print(f"\nüèÜ Best method: {best_method}")
print(f"   Perplexity: {results[best_method]['perplexity']:.4f}")

## Step 6: Save Best Model

In [None]:
# The models are already saved in OUTPUT_DIR
# You can load and test them:

from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the best model (usually TIES-SparseGPT)
best_model_path = f"{OUTPUT_DIR}/ties_sparsegpt_merged"

model = AutoModelForCausalLM.from_pretrained(best_model_path)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

print(f"‚úì Loaded best model from {best_model_path}")

## Step 7: Test the Merged Model

In [None]:
# Test with a sample prompt
prompt = "I've been feeling anxious lately. What should I do?"

inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    temperature=0.7,
    do_sample=True,
    top_p=0.9
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Prompt:", prompt)
print("\nResponse:", response)

## Optional: Run Individual Methods

If you want to run just one method:

In [None]:
# Run only SparseGPT method
sparsegpt_model = merger.merge_with_ties(use_sparsegpt=True)
sparsegpt_model.save_pretrained(f"{OUTPUT_DIR}/sparsegpt_only")

print("‚úì SparseGPT model saved")

## Optional: Upload to HuggingFace Hub

In [None]:
# Upload the best model to HuggingFace
from huggingface_hub import HfApi

repo_name = "your-username/merged-mental-health-counselor"

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"‚úì Model uploaded to {repo_name}")

## Memory Monitoring (Optional)

In [None]:
# Check GPU memory usage
if torch.cuda.is_available():
    print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
    
    # Clear cache if needed
    # torch.cuda.empty_cache()