In [1]:
import os
import re
import requests

HF_TOKEN = os.environ.get("HF_TOKEN")  # set env var HF_TOKEN=...
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

def build_prompt(problem: str, options: str) -> str:
    return f"""You are solving a multiple-choice math word problem.

Problem:
{problem}

Options:
{options}

Return ONLY the letter of the correct option (A, B, C, D, or E)."""

def extract_choice(text: str) -> str:
    # Accept outputs like "A", "Answer: B", "(c)", etc.
    m = re.search(r"\b([ABCDE])\b", text.strip().upper())
    return m.group(1) if m else "E"

def predict_answer(problem: str, options: str) -> dict:
    prompt = build_prompt(problem, options)

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 5,
            "temperature": 0.0,
            "return_full_text": False,
        }
    }

    r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
    r.raise_for_status()
    out = r.json()

    # HF Inference API commonly returns: [{"generated_text": "..."}]
    if isinstance(out, list) and out and "generated_text" in out[0]:
        generated = out[0]["generated_text"]
    else:
        # Some backends return different shapes
        generated = str(out)

    choice = extract_choice(generated)
    return {"choice": choice, "raw_generation": generated}

if __name__ == "__main__":
    problem = "Sophia finished 2/3 of a book. She finished 90 more pages than she has yet to read. How long is the book?"
    options = "A) 229, B) 270, C) 877, D) 266, E) 281"
    print(predict_answer(problem, options))


HTTPError: 410 Client Error: Gone for url: https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct