In [None]:
# Clone repository and install dependencies
!rm -rf LST
!git clone https://github.com/Js-Hwang1/LST.git
!pip install -q rouge fuzzywuzzy python-Levenshtein

In [None]:
import os
import subprocess

os.environ["HF_TOKEN"] = "YOUR_HF_TOKEN_HERE"  # Replace with your token

# Configuration
MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
TASKS = "qasper,multifieldqa_en,hotpotqa,2wikimqa,musique,gov_report,multi_news,trec,triviaqa,samsum,lcc,repobench-p"
NUM_SAMPLES = 100
CHECKPOINT = "checkpoints/tinyllama_sidecar/final.pt"  # Update path if needed

# Create results and logs directories
os.makedirs("LST/results", exist_ok=True)
os.makedirs("LST/logs", exist_ok=True)

# Base command template
BASE_CMD = f"""cd LST && nohup python scripts/benchmark/eval_longbench.py \
    --model_name {MODEL} \
    --tasks {TASKS} \
    --num_samples {NUM_SAMPLES}"""

print("üöÄ Launching 4 parallel evaluation jobs...")
print("=" * 60)

# Job 1: Dense
cmd = f"{BASE_CMD} --methods dense --output_file results/longbench_dense.json > logs/dense.log 2>&1 &"
subprocess.Popen(cmd, shell=True)
print("Started: dense")

# Job 2: H2O
cmd = f"{BASE_CMD} --methods h2o --output_file results/longbench_h2o.json > logs/h2o.log 2>&1 &"
subprocess.Popen(cmd, shell=True)
print("Started: h2o")

# Job 3: PyramidKV
cmd = f"{BASE_CMD} --methods pyramidkv --output_file results/longbench_pyramidkv.json > logs/pyramidkv.log 2>&1 &"
subprocess.Popen(cmd, shell=True)
print("Started: pyramidkv")

# Job 4: LST (requires checkpoint)
cmd = f"{BASE_CMD} --methods lst --checkpoint {CHECKPOINT} --output_file results/longbench_lst.json > logs/lst.log 2>&1 &"
subprocess.Popen(cmd, shell=True)
print("Started: lst")

import time
time.sleep(5)
print("=" * 60)
print("‚úÖ All jobs launched! Run the next cells to monitor.")

In [None]:
# Check GPU memory usage
!nvidia-smi

In [None]:
# Check running processes
!ps aux | grep eval_longbench | grep -v grep

In [None]:
# Monitor all logs in real-time (interrupt kernel to stop)
# This will show interleaved output from all 4 jobs
!tail -f LST/logs/*.log

In [None]:
# Check last 20 lines of each log (run periodically to check progress)
print("=" * 60)
print("DENSE LOG:")
print("=" * 60)
!tail -20 LST/logs/dense.log
print("\n" + "=" * 60)
print("H2O LOG:")
print("=" * 60)
!tail -20 LST/logs/h2o.log
print("\n" + "=" * 60)
print("PYRAMIDKV LOG:")
print("=" * 60)
!tail -20 LST/logs/pyramidkv.log
print("\n" + "=" * 60)
print("LST LOG:")
print("=" * 60)
!tail -20 LST/logs/lst.log

In [None]:
# View results once jobs complete
import json
import os

results_dir = "LST/results"
methods = ["dense", "h2o", "pyramidkv", "lst"]

print("=" * 80)
print("LONGBENCH RESULTS COMPARISON")
print("=" * 80)

all_results = {}
for method in methods:
    path = f"{results_dir}/longbench_{method}.json"
    if os.path.exists(path):
        with open(path) as f:
            data = json.load(f)
            all_results[method] = data.get("results", {}).get(method, {})
            print(f"‚úÖ {method}: loaded")
    else:
        print(f"‚è≥ {method}: not ready yet")

if all_results:
    # Get task names
    tasks = [k for k in list(all_results.values())[0].keys() if k != "average"]
    
    # Print table header
    header = f"{'Task':<20}"
    for method in all_results.keys():
        header += f"{method:>12}"
    print("\n" + header)
    print("-" * (20 + 12 * len(all_results)))
    
    # Print each task
    for task in tasks:
        row = f"{task:<20}"
        for method, results in all_results.items():
            score = results.get(task, {}).get("score", 0)
            if isinstance(score, dict):
                score = score.get("score", 0)
            row += f"{score:>12.2f}"
        print(row)
    
    # Print average
    print("-" * (20 + 12 * len(all_results)))
    row = f"{'AVERAGE':<20}"
    for method, results in all_results.items():
        avg = results.get("average", 0)
        row += f"{avg:>12.2f}"
    print(row)
    print("=" * 80)

In [None]:
# Disconnect from Colab (run when all jobs complete)
from google.colab import runtime
runtime.unassign()