In [None]:
# importing necessary libraries
import os
import json
import requests
from dotenv import load_dotenv
from openai import OpenAI
from chats import chats
import time

In [None]:
# loading api key
load_dotenv(override=True)
groq_api_key = os.getenv('GROQ_API_KEY')

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set")

Groq API Key exists and begins gsk_


In [4]:
# Connect to OpenAI client library

# For Groq, we can use the OpenAI python client
# Because they have endpoints compatible with OpenAI
# And OpenAI allows you to change the base_url

groq_url = "https://api.groq.com/openai/v1"

groq = OpenAI(api_key=groq_api_key, base_url=groq_url)

# Model Run Function

In [None]:
def run_model(prompt_template, model_name):
    """
    Generate model responses for a set of chats.

    Each chat text from the global `chats` dictionary is inserted into
    the given prompt template and sent to the specified model via Groq.
    The function collects the model's replies in a dictionary.

    Parameters
    ----------
    prompt_template : str
        Template string with a `{chat_text}` placeholder.
    model_name : str
        Name of the model to query.

    Returns
    -------
    dict
        Mapping of chat IDs to the model's response text.
    """
    outputs = {}

    for chat_id, chat_text in chats.items():
        prompt = prompt_template.format(chat_text=chat_text)

        response = groq.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )

        outputs[chat_id] = response.choices[0].message.content

    return outputs

# Prompt Strategy 1 - Naive Baseline

Purpose: establish a weak baseline  
Expected: errors, hallucinations

Likely failures:
- Infers missing data
- Wrong date conversions
- Extra keys

We Use this to understand why prompting matters.

In [5]:
PROMPT_1 = """
Extract the required entities from the following chat conversation and return them as JSON.

Chat:
{chat_text}

Output the JSON only.
"""

In [None]:
# llama model with prompt1
llama_prompt1_output = run_model(
    prompt_template=PROMPT_1,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt1_raw.json", "w") as f:
    json.dump(llama_prompt1_output, f, indent=2)

In [None]:
# gpt model with prompt1
gpt_prompt1_output = run_model(
    prompt_template=PROMPT_1,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt1_raw.json", "w") as f:
    json.dump(gpt_prompt1_output, f, indent=2)

In [None]:
# qwen model with prompt1
qwen_prompt1_output = run_model(
    prompt_template=PROMPT_1,
    model_name="qwen/qwen3-32b"
    )

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt1_raw.json", "w") as f:
    json.dump(qwen_prompt1_output, f, indent=2)

# Prompt Strategy 2 - Schema Strict

Purpose: enforce discipline  
Expected: best overall F1

Strength:
- Lowest hallucination
- Clean JSON

Weakness:
- Might miss borderline mentions

In [20]:
PROMPT_2 = """
You are given a chat conversation between a visitor and a real estate agent.

Your task is to extract the following entities strictly according to the rules.

Rules:
- Capture an entity ONLY if it is explicitly mentioned
- Do NOT infer or guess missing information
- Do NOT infer names from email addresses
- If an entity is not mentioned, return null
- Return EXACTLY the JSON schema provided
- Do NOT add any extra keys
- Do NOT include explanations or reasoning

Chat:
{chat_text}

Output JSON schema:
{{
  "first_name": null,
  "last_name": null,
  "phone_number": null,
  "email": null,
  "budget": null,
  "current_location": null,
  "preferred_location": null,
  "profession": null,
  "date_of_visit": null,
  "buying_timeline_weeks": null
}}
"""


In [None]:
# llama model with prompt2
llama_prompt2_output = run_model(
    prompt_template=PROMPT_2,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt2_raw.json", "w") as f:
    json.dump(llama_prompt2_output, f, indent=2)

In [None]:
# gpt model with prompt2
gpt_prompt2_output = run_model(
    prompt_template=PROMPT_2,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt2_raw.json", "w") as f:
    json.dump(gpt_prompt2_output, f, indent=2)

In [None]:
# qwen model with prompt2
qwen_prompt2_output = run_model(
    prompt_template=PROMPT_2,
    model_name="qwen/qwen3-32b"
    )

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt2_raw.json", "w") as f:
    json.dump(qwen_prompt2_output, f, indent=2)

# Prompt Strategy 3 — Hidden Chain-of-Thought

Purpose: improve reasoning without leaking explanations  
Expected: higher accuracy on dates & timelines

Strength:
- Better date normalization
- Better timeline conversion

Risk:
- Some models still leak text

In [29]:
PROMPT_3 = """
You are given a chat conversation between a visitor and a real estate agent.

Think step by step to identify ONLY explicitly mentioned entities and normalize them according to the rules.
Do not include your reasoning in the final output.

Rules:
- Do not infer missing information
- Return null if an entity is not explicitly stated
- Return exactly the JSON schema provided

Chat:
{chat_text}

Final Output (JSON only):
{{
  "first_name": null,
  "last_name": null,
  "phone_number": null,
  "email": null,
  "budget": null,
  "current_location": null,
  "preferred_location": null,
  "profession": null,
  "date_of_visit": null,
  "buying_timeline_weeks": null
}}
"""

In [None]:
# llama model with prompt3
llama_prompt3_output = run_model(
    prompt_template=PROMPT_3,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt3_raw.json", "w") as f:
    json.dump(llama_prompt3_output, f, indent=2)

In [None]:
# gpt model with prompt3
gpt_prompt3_output = run_model(
    prompt_template=PROMPT_3,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt3_raw.json", "w") as f:
    json.dump(gpt_prompt3_output, f, indent=2)

In [None]:
# qwen model with prompt3
qwen_prompt3_output = run_model(
    prompt_template=PROMPT_3,
    model_name="qwen/qwen3-32b"
    )

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt3_raw.json", "w") as f:
    json.dump(qwen_prompt3_output, f, indent=2)

# Prompt Strategy 4 — Negative Instruction Prompt

Purpose: reduce hallucinations  
Expected: higher precision, lower recall

Strength:
- Excellent at suppressing hallucinations

Weakness:
- Misses borderline but valid mentions

In [36]:
PROMPT_4 = """
Extract entities from the chat below.

Important constraints:
- Do NOT infer missing information
- Do NOT guess professions
- Do NOT extract names from email addresses
- Do NOT fabricate phone numbers or dates
- If an entity is not explicitly mentioned, return null

Chat:
{chat_text}

Return ONLY the JSON in the following format:
{{
  "first_name": null,
  "last_name": null,
  "phone_number": null,
  "email": null,
  "budget": null,
  "current_location": null,
  "preferred_location": null,
  "profession": null,
  "date_of_visit": null,
  "buying_timeline_weeks": null
}}
"""

In [None]:
# llama model with prompt4
llama_prompt4_output = run_model(
    prompt_template=PROMPT_4,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt4_raw.json", "w") as f:
    json.dump(llama_prompt4_output, f, indent=2)

In [None]:
# gpt model with prompt4
gpt_prompt4_output = run_model(
    prompt_template=PROMPT_4,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt4_raw.json", "w") as f:
    json.dump(gpt_prompt4_output, f, indent=2)

In [None]:
# qwen model with prompt4
qwen_prompt4_output = run_model(
    prompt_template=PROMPT_4,
    model_name="qwen/qwen3-32b"
    )

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt4_raw.json", "w") as f:
    json.dump(qwen_prompt4_output, f, indent=2)

# Prompt Strategy 5 — One-Shot Example

Purpose: teach behavior by example  
Expected: better consistency across chats

Strength:
- Improves structure for weaker models

Weakness:
- Overfits example sometimes

In [43]:
PROMPT_5 = """
Example:

Chat:
Visitor: Hi, my name is Ramesh Kumar. I want to buy a flat in Andheri.
Agent: May I know your budget?
Visitor: Around 1.2 crore. I stay in Borivali.

Output:
{{
  "first_name": "Ramesh",
  "last_name": "Kumar",
  "phone_number": null,
  "email": null,
  "budget": 12000000,
  "current_location": "Borivali",
  "preferred_location": "Andheri",
  "profession": null,
  "date_of_visit": null,
  "buying_timeline_weeks": null
}}

Now extract entities from the following chat.

Chat:
{chat_text}

Return JSON only using the same schema.
"""

In [None]:
# llama model with prompt5
llama_prompt5_output = run_model(
    prompt_template=PROMPT_5,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt5_raw.json", "w") as f:
    json.dump(llama_prompt5_output, f, indent=2)

In [None]:
# gpt model with prompt5
gpt_prompt5_output = run_model(
    prompt_template=PROMPT_5,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt5_raw.json", "w") as f:
    json.dump(gpt_prompt5_output, f, indent=2)

In [None]:
# qwen model with prompt5
qwen_prompt5_output = run_model(
    prompt_template=PROMPT_5,
    model_name="qwen/qwen3-32b"
    )

In [None]:

# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt5_raw.json", "w") as f:
    json.dump(qwen_prompt5_output, f, indent=2)

# Prompt Strategy 6 — Entity-by-Entity Extraction

Purpose: reduce cross-entity confusion  
Expected: better precision on names & contacts

Strength:
- Helps smaller models

Weakness:
- Slightly verbose

In [7]:
PROMPT_6 = """
Extract the following entities from the chat:

1. First Name
2. Last Name
3. Phone Number
4. Email
5. Budget (integer INR)
6. Current Location
7. Preferred Location
8. Profession (service, business, retired)
9. Date of Visit (YYYY-MM-DD)
10. Buying Timeline in Weeks

Rules:
- Extract ONLY if explicitly mentioned
- Do NOT infer missing values
- Return null for missing entities

Chat:
{chat_text}

Return the result strictly as JSON using this schema:
{{
  "first_name": null,
  "last_name": null,
  "phone_number": null,
  "email": null,
  "budget": null,
  "current_location": null,
  "preferred_location": null,
  "profession": null,
  "date_of_visit": null,
  "buying_timeline_weeks": null
}}
"""

In [None]:
# llama model with prompt6
llama_prompt6_output = run_model(
    prompt_template=PROMPT_6,
    model_name="llama-3.3-70b-versatile"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/llama_prompt6_raw.json", "w") as f:
    json.dump(llama_prompt6_output, f, indent=2)

In [None]:
# gpt model with prompt6
gpt_prompt6_output = run_model(
    prompt_template=PROMPT_6,
    model_name="openai/gpt-oss-120b"
)

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/gpt_prompt6_raw.json", "w") as f:
    json.dump(gpt_prompt6_output, f, indent=2)

In [None]:
# qwen model with prompt6
qwen_prompt6_output = run_model(
    prompt_template=PROMPT_6,
    model_name="qwen/qwen3-32b"
    )

In [None]:
# storing raw json file outputed by the model
os.makedirs("outputs", exist_ok=True)

with open("outputs/qwen_prompt6_raw.json", "w") as f:
    json.dump(qwen_prompt6_output, f, indent=2)

# Modifying model call function to capture latency and token usage
## Add timing + usage capture

In [None]:
import time

def run_model_with_metrics(prompt_template, model_name):
    """
    Run a model on chat inputs and collect performance metrics.

    Each chat text from the global `chats` dictionary is formatted into
    the given prompt template and sent to the specified model via Groq.
    The function records the model's responses along with latency and
    token usage statistics.

    Parameters
    ----------
    prompt_template : str
        Template string with a `{chat_text}` placeholder.
    model_name : str
        Name of the model to query.

    Returns
    -------
    tuple
        (outputs, metrics) where:
        - outputs (dict): Mapping of chat IDs to model response text.
        - metrics (dict): Contains:
            * avg_latency_sec (float): Average response time in seconds.
            * latencies (list of float): Per-chat response times.
            * token_usages (list of dict): Token usage details if available.
    """
    outputs = {}
    latencies = []
    token_usages = []

    for chat_id, chat_text in chats.items():
        prompt = prompt_template.format(chat_text=chat_text)

        start_time = time.time()

        response = groq.chat.completions.create(
            model=model_name,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        end_time = time.time()

        # Latency
        latency = end_time - start_time
        latencies.append(latency)

        # Output
        outputs[chat_id] = response.choices[0].message.content

        # Token usage (if exposed)
        if hasattr(response, "usage") and response.usage is not None:
            token_usages.append({
                "prompt_tokens": getattr(response.usage, "prompt_tokens", None),
                "completion_tokens": getattr(response.usage, "completion_tokens", None),
                "total_tokens": getattr(response.usage, "total_tokens", None),
            })

    metrics = {
        "avg_latency_sec": sum(latencies) / len(latencies),
        "latencies": latencies,
        "token_usages": token_usages
    }

    return outputs, metrics


## Running modified function call on Top 3 configurations

## GPT-OSS model call with prompt_6

In [None]:
outputs, metrics = run_model_with_metrics(
    prompt_template=PROMPT_6,
    model_name="openai/gpt-oss-120b"
)

# storing metrics file outputed by the model
with open("outputs/gpt_prompt6_metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

Average latency of GPT-OSS is 3.01 seconds.  
Prompt tokens are in the range of 400-600.  
Total token count falls in the range 730-1140.

## Llama model call with prompt_6

In [None]:
outputs, metrics = run_model_with_metrics(
    prompt_template=PROMPT_6,
    model_name="llama-3.3-70b-versatile"
)

# storing metrics file outputed by the model
with open("outputs/llama_prompt6_metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

Average latency of Llama is 1.8 seconds.  
Prompt tokens are in the range of 400-600.  
Total token count falls in the range 475-720.

## Qwen model call with prompt_6

In [None]:
outputs, metrics = run_model_with_metrics(
    prompt_template=PROMPT_6,
    model_name="qwen/qwen3-32b"
)

# storing metrics file outputed by the model
with open("outputs/qwen_prompt6_metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

Average latency of Qwen is 5.5 seconds.  
Prompt tokens are in the range of 400-600.  
Total token count falls in the range 700-1600.