# Agent Colab: Research Paper Entity Extraction Benchmark

Runs **Gemini 3 Pro Preview** as an autonomous agent on the benchmark task.
Uses `google.colab.ai` for native Colab Pro AI integration. No API keys required.

In [None]:
%pip install -q pandas networkx python-Levenshtein

In [None]:
%pip install -q git+https://github.com/EhsanKA/agentic_task.git

from google.colab import ai
import json

available_models = ai.list_models()
print("Available models:", available_models)

In [None]:
from benchmark.data.loader import setup_data
from benchmark.evaluation.prompt import BENCHMARK_PROMPT
from benchmark.evaluation.agent import (
    select_model, build_agent_context, execute_agent_code, extract_variables
)

papers_raw, citations_raw, affiliations_raw, DATA_DIR = setup_data()
MODEL_NAME = select_model(available_models)

In [None]:
context = build_agent_context(BENCHMARK_PROMPT, papers_raw, citations_raw, affiliations_raw)

print("Sending task to agent...")
agent_response = ai.generate_text(prompt=context, model_name=MODEL_NAME)
print("Response received.")
print(agent_response[:2000])

In [None]:
exec_result = execute_agent_code(agent_response, papers_raw, citations_raw, affiliations_raw)

results = extract_variables(exec_result) if exec_result else {}
for k, v in results.items():
    globals()[k] = v

In [None]:
try:
    print(json.dumps(validation_results, indent=2))
    print(json.dumps(final_report, indent=2, default=str))
except NameError as e:
    print(f"Missing: {e}")

In [None]:
from benchmark.evaluation.tests import set_context, run_all_tests

set_context(results)
test_result = run_all_tests()

In [None]:
print("=" * 60)
print(f"Model: {MODEL_NAME}")
print(f"Tests: {test_result.testsRun} run, {len(test_result.failures)} failures, {len(test_result.errors)} errors")
if test_result.wasSuccessful():
    print("BENCHMARK PASSED")
else:
    print("BENCHMARK FAILED")
print("=" * 60)