In [1]:
#from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os, json
from groq import Groq

load_dotenv()
client = Groq(api_key=os.getenv("GROQ_API_KEY"))


prompt = """Task: Classify a location review for relevance and policy violations.

Definitions (pick exactly one violation):
- advertisement: contains promotional links/codes, phone numbers, coupons, “call now”, or unrelated marketing.
- irrelevant: off-topic; not about this place or its services; about another product/person/event; misplaced review (clearly for a different business type); filler/emoji-only.
- rant_no_visit: reviewer clearly states they did not visit (e.g., “never been”, “haven’t been”, “heard it’s…”).
- ok: none of the above AND the review is about this place.

Intent & fallback principle:
If none of the violation categories apply and the review is about the place, classify as "ok".
Default to "ok" when in doubt, because the goal is to:
- increase user trust in location-based reviews (better decisions),
- ensure fair representation for businesses,
- and enhance platform credibility via consistent moderation.

Output STRICT JSON only (double-quoted keys/strings; booleans lowercase):
{{
  "relevant": true|false,
  "violation": "advertisement"|"irrelevant"|"rant_no_visit"|"ok",
  "classification": "advertisement"|"irrelevant"|"rant_no_visit"|"ok",
  "confidence": 0.0-1.0,
  "reasoning": "<short phrase>",
  "indicators": ["token1","token2"]
}}

Place:  Pizza restaurant, Delivery Restaurant, Takeout Restaurant, Pizza delivery, Pizza Takeout
Review: "I have not been there but i heard that the dough was limp. Pizza wasn't cut. The chicken bites were burnt and nasty. Sauce was not good."
"""

resp = client.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=[{"role": "user", "content": prompt}],
    temperature=0,                     # deterministic
    response_format={"type": "json_object"},  # ask Groq to return valid JSON
    max_tokens=200,
)

text = resp.choices[0].message.content
# Should already be strict JSON thanks to response_format
data = json.loads(text)
print(data)  # {'relevant': false, 'violation': 'irrelevant', 'reason': '...'}

{'relevant': True, 'violation': 'rant_no_visit', 'classification': 'rant_no_visit', 'confidence': 0.8, 'reasoning': 'reviewer states they did not visit', 'indicators': ["haven't been", 'never been']}
