## Import libs

In [1]:
# Standard imports used by the notebook
import os
import json
import requests
import time
from pathlib import Path


## Load data

In [2]:
# Path to the JSONL file shipped with the repo
DATA_PATH = Path("combined_benchmarks_648.jsonl")
if not DATA_PATH.exists():
    raise FileNotFoundError(f"Data file not found at {DATA_PATH}. Run from repository root or adjust the path.")

data = []
with DATA_PATH.open("r") as fh:
    for ln in fh:
        ln = ln.strip()
        if not ln:
            continue
        data.append(json.loads(ln))

print(f"Loaded {len(data)} records from {DATA_PATH}")

Loaded 648 records from combined_benchmarks_648.jsonl


## Set env variables

In [3]:
# These should be set in your environment or you can replace the placeholders here
from dotenv import load_dotenv
load_dotenv()
ENDPOINT_URL = os.environ.get("ENDPOINT_URL") or "ENDPOINT_URL_GOES_HERE"
PROJECT_ID = os.environ.get("PROJECT_ID") or "PROJECT_ID_GOES_HERE"
LOCATION = os.environ.get("LOCATION") or "LOCATION_GOES_HERE"
ENDPOINT_ID = os.environ.get("ENDPOINT_ID") or "ENDPOINT_ID_GOES_HERE"
# MODEL_ID is required for the chat-style payload (provide as env var or replace with a model id)
MODEL_ID = os.environ.get("MODEL_ID") or "MODEL_ID_GOES_HERE"
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN") or get_access_token()

# Quick sanity check - replace placeholders with real values before running requests
print("ENDPOINT_URL:", ENDPOINT_URL)
print("PROJECT_ID:", PROJECT_ID)
print("LOCATION:", LOCATION)
print("ENDPOINT_ID:", ENDPOINT_ID)
print("MODEL_ID:", MODEL_ID)
print("ACCESS_TOKEN:", ACCESS_TOKEN[:10] + "..." if ACCESS_TOKEN else "None")

ENDPOINT_URL: mg-endpoint-ff24e4f2-c7c8-4cd3-9ee9-01ba5c69cc51.us-central1-656295791473.prediction.vertexai.goog
PROJECT_ID: kiln-pilot-476814
LOCATION: us-central1
ENDPOINT_ID: mg-endpoint-ff24e4f2-c7c8-4cd3-9ee9-01ba5c69cc51
MODEL_ID: mg-custom-1763221065
ACCESS_TOKEN: ya29.a0ATi...


## Call one single row from jsonl 

In [5]:
# Pick a single record (index 0) and call the Vertex AI endpoint using the same payload shape as your curl example
idx = 50
record = data[idx]

# --- MODIFIED SECTION ---

# Extract the entire list of messages (system + user)
messages_list = record.get("body", {}).get("messages", [])
# Extract the temperature from the record, default to 0
temperature = record.get("temperature", 0)

if not messages_list:
    raise ValueError(f"No 'messages' list found for record {idx}")

# Build the payload using the extracted messages and temperature
payload = {
    "model": MODEL_ID,
    "temperature": temperature,
    "messages": messages_list  # Pass the full list
}

print("Calling endpoint with payload:")
print(json.dumps(payload, indent=2))
print("-" * 30)

# --- END OF MODIFIED SECTION ---


# Build the URL and headers, then perform the request
# (This part remains the same)
token = ACCESS_TOKEN
url = f"https://{ENDPOINT_URL}/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}/chat/completions"
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}
resp = requests.post(url, headers=headers, json=payload, timeout=60)
print("HTTP status:", resp.status_code)

# If the service returns JSON, pretty-print it
try:
    # Added indent=2 for better readability of the JSON response
    print(json.dumps(resp.json(), indent=2))
except Exception:
    print(resp.text)

Calling endpoint with payload:
{
  "model": "mg-custom-1763221065",
  "temperature": 0,
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful math assistant. Your primary goal is to accurately solve mathematical problems and provide clear, step-by-step explanations for your reasoning.\n\nWhen responding to a math query:\n1.  *Analyze:* Carefully read the problem to understand exactly what is being asked.\n2.  *Show Your Work:* Break down the solution into logical, easy-to-follow steps.\n3.  *Use Formatting:* Utilize LaTeX for all complex mathematical equations, formulas, and variables to ensure clarity and correct rendering (e.g., $$E=mc^2$$).\n4.  *Final Answer:* Clearly state the final answer at the end of your explanation."
    },
    {
      "role": "user",
      "content": "Add 350 and 450."
    }
  ]
}
------------------------------
HTTP status: 200
{
  "id": "chatcmpl-5ade642d-f4e2-4243-9013-19d35a5afe78",
  "object": "chat.completion",
  "created": 1

## call all rows in a loop and save it, also checkpoint it

In [5]:
import time
import json
from pathlib import Path
import requests

# <-- No tqdm, No ipywidgets -->

# Iterate through all records, call the endpoint, and save responses to a JSONL file.
OUT_PATH = Path(f"Milestone-5/math-agent/{ENDPOINT_URL}_predictions.jsonl")
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)

# -----------------------------
# CHECKPOINTING LOGIC
# -----------------------------
start_index = 0
if OUT_PATH.exists():
    print(f"Checkpoint file found at: {OUT_PATH}")
    last_index = -1

    with OUT_PATH.open("r") as f:
        for line in f:
            try:
                obj = json.loads(line)
                last_index = obj.get("index", last_index)
            except:
                continue

    start_index = last_index + 1
    print(f"Resuming from index {start_index} (last finished index = {last_index})")
else:
    print("No checkpoint found, starting from index 0.")

# -----------------------------

start_time = time.time()
two_hours_in_seconds = 4.5 * 60 * 60

print("Starting processing. Will run for a maximum of 4.5 hours.")
print(f"Saving outputs to: {OUT_PATH}")

total_records = len(data)

# "a" mode because we are resuming
with OUT_PATH.open("a") as outf:
    for i in range(start_index, total_records):

        record = data[i]
        elapsed_time = time.time() - start_time
        if elapsed_time > two_hours_in_seconds:
            print(f"\nTime limit (4.5 hours) reached. Stopping loop at index {i}.")
            break

        if (i + 1) % 25 == 0:
            print(f"Processed {i+1} / {total_records} records...")

        messages_list = record.get("body", {}).get("messages", [])
        temperature = record.get("temperature", 0)

        # Find last user message
        user_prompt = None
        for m in reversed(messages_list):
            if m.get("role") == "user":
                user_prompt = m.get("content")
                break

        if not user_prompt:
            print(f"Skipping index {i}: No user message found.")
            continue

        payload = {
            "model": MODEL_ID,
            "temperature": temperature,
            "messages": messages_list,
        }

        try:
            token = ACCESS_TOKEN
            url = f"https://{ENDPOINT_URL}/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}/chat/completions"

            resp = requests.post(
                url,
                headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
                json=payload,
                timeout=60,
            )

            try:
                body = resp.json()
            except:
                body = resp.text

            out = {
                "index": i,
                "status": resp.status_code,
                "prompt": user_prompt,
                "response": body,
            }

            print(f"Index {i} processed successfully.")

        except Exception as e:
            out = {"index": i, "error": str(e), "prompt": user_prompt}

        outf.write(json.dumps(out) + "\n")

        if (i + 1) % 10 == 0:
            outf.flush()

        time.sleep(0.5)

print(f"Finished. Last processed index: {i}")
print("Responses saved to:", OUT_PATH)


Checkpoint file found at: Milestone-5\math-agent\mg-endpoint-ff24e4f2-c7c8-4cd3-9ee9-01ba5c69cc51.us-central1-656295791473.prediction.vertexai.goog_predictions.jsonl
Resuming from index 17 (last finished index = 16)
Starting processing. Will run for a maximum of 4.5 hours.
Saving outputs to: Milestone-5\math-agent\mg-endpoint-ff24e4f2-c7c8-4cd3-9ee9-01ba5c69cc51.us-central1-656295791473.prediction.vertexai.goog_predictions.jsonl
Index 17 processed successfully.
Index 18 processed successfully.
Index 19 processed successfully.
Index 20 processed successfully.
Index 21 processed successfully.
Index 22 processed successfully.
Index 23 processed successfully.
Processed 25 / 648 records...
Index 24 processed successfully.
Index 25 processed successfully.
Index 26 processed successfully.
Index 27 processed successfully.
Index 28 processed successfully.
Index 29 processed successfully.
Index 30 processed successfully.
Index 31 processed successfully.
Index 32 processed successfully.
Index 33 