In [None]:
import sys
sys.path.append('/content/t4opt')

from eval.perplexity import PerplexityEvaluator
from eval.benchmarks import BenchmarkRunner
from eval.speed_test import SpeedTester
from utils.memory import MemoryManager

# Model path (adjust based on your model)
model_path = "./checkpoints/phi-2-qlora"  # or quantized model path


In [None]:
# Perplexity evaluation
perplexity_evaluator = PerplexityEvaluator(model_path=model_path)
perplexity_result = perplexity_evaluator.evaluate(max_samples=50)

print(f"Perplexity: {perplexity_result['perplexity']:.4f}")
print(f"Average Loss: {perplexity_result['average_loss']:.4f}")
print(f"Total Tokens: {perplexity_result['total_tokens']}")


In [None]:
# Run benchmarks
benchmark_runner = BenchmarkRunner(model_path=model_path)
benchmark_results = benchmark_runner.run(benchmarks=["mmlu", "generation"])

print("Benchmark Results:")
for benchmark, results in benchmark_results.items():
    print(f"\n{benchmark.upper()}:")
    if isinstance(results, dict):
        for key, value in results.items():
            if key != "generations":  # Skip full generation text
                print(f"  {key}: {value}")


In [None]:
# Speed test
speed_tester = SpeedTester(model_path=model_path)
latency_results = speed_tester.test_latency(num_runs=10)

print("Latency Results:")
print(f"  Average Latency: {latency_results['avg_latency_ms']:.2f} ms")
print(f"  Tokens/Second: {latency_results['avg_tokens_per_second']:.2f}")
print(f"  Device: {latency_results['device']}")


In [None]:
# Generate evaluation report
from agents.evaluator import EvalAgent

eval_agent = EvalAgent()
report_result = eval_agent.execute(
    task="generate_report",
    context={
        "results": {
            "perplexity": perplexity_result["perplexity"],
            "benchmarks": {
                "mmlu_accuracy": benchmark_results.get("mmlu", {}).get("accuracy", 0),
                "generation_avg_length": benchmark_results.get("generation", {}).get("avg_generation_length", 0)
            }
        },
        "output_path": "./eval_report.txt"
    }
)

print("Evaluation report generated!")
print(f"Report saved to: {report_result.result['output_path']}")
