<a href="https://colab.research.google.com/github/IS2022U/Conversation-Management-Groq/blob/main/yardstick.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Installing the dependencies
!pip install --quiet openai jsonschema

from getpass import getpass
import os
from openai import OpenAI
import json
import time
from typing import List, Dict



In [2]:
# Setting API key and base.
#This uses getpass so the key is not printed.

API_KEY = getpass("Enter your Groq/OpenAI-compatible API key : ")
API_BASE = getpass("Enter the API base (press Enter to use https://api.groq.com/openai/v1): ")
if API_BASE.strip() == "":
    API_BASE = "https://api.groq.com/openai/v1"

# Save to environment variables
os.environ["OPENAI_API_KEY"] = API_KEY
os.environ["OPENAI_API_BASE"] = API_BASE

# Creating the OpenAI client
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=os.environ["OPENAI_API_BASE"])

MODEL = "gpt-4o"

print("Client created. Model set to:", MODEL)


Enter your Groq/OpenAI-compatible API key : ··········
Enter the API base (press Enter to use https://api.groq.com/openai/v1): ··········
Client created. Model set to: gpt-4o


In [3]:
#generating the compatible model
models = client.models.list()
for m in models.data:
    print(m.id)


meta-llama/llama-guard-4-12b
deepseek-r1-distill-llama-70b
llama-3.3-70b-versatile
meta-llama/llama-4-maverick-17b-128e-instruct
meta-llama/llama-prompt-guard-2-86m
qwen/qwen3-32b
groq/compound
whisper-large-v3-turbo
whisper-large-v3
moonshotai/kimi-k2-instruct-0905
openai/gpt-oss-20b
meta-llama/llama-4-scout-17b-16e-instruct
meta-llama/llama-prompt-guard-2-22m
playai-tts-arabic
gemma2-9b-it
openai/gpt-oss-120b
playai-tts
moonshotai/kimi-k2-instruct
llama-3.1-8b-instant
allam-2-7b
groq/compound-mini


Task 1: Conversation Management with Summarization

In [4]:
class ConversationManager:
    def __init__(self, model: str = "llama-3.3-70b-versatile", openai_client=None, k_summarize: int = 3):
        """
        model: model name to use for summarization
        openai_client: an instance of OpenAI (required)
        k_summarize: perform summarization after every k runs
        """
        if openai_client is None:
            raise ValueError("openai_client must be provided (pass the OpenAI client instance)")
        self.model = model
        self.openai = openai_client
        self.history: list[dict] = []
        self.run_count = 0
        self.k_summarize = k_summarize

    # basic history operations
    def add_message(self, role: str, content: str):
        assert role in ("user", "assistant", "system")
        self.history.append({"role": role, "content": content})

    def get_history(self) -> list[dict]:
        return self.history.copy()

    # truncation helpers
    def truncate_by_turns(self, n: int):
        if n <= 0:
            return []
        return self.history[-n:]

    def truncate_by_chars(self, max_chars: int):
        if max_chars <= 0:
            return []
        kept = []
        total = 0
        for m in reversed(self.history):
            if total + len(m['content']) > max_chars:
                break
            kept.append(m)
            total += len(m['content'])
        return list(reversed(kept))

    def truncate_by_words(self, max_words: int):
        if max_words <= 0:
            return []
        kept = []
        total = 0
        for m in reversed(self.history):
            wc = len(m['content'].split())
            if total + wc > max_words:
                break
            kept.append(m)
            total += wc
        return list(reversed(kept))

    # summarization
    def _build_summarization_prompt(self, messages: list[dict]) -> str:
        conversation_text = ""
        for m in messages:
            conversation_text += f"{m['role'].upper()}: {m['content']}\n"
        prompt = (
            "You are a concise summarizer. Summarize the conversation into:\n"
            "- A short 2-4 line summary\n"
            "- Bullet points with key facts / user requests\n"
            "- Any actions or pending items (if present)\n\n"
            "Conversation:\n"
            f"{conversation_text}\n"
            "Return only the summary (no extra commentary)."
        )
        return prompt

    def summarize_messages(self, messages: list[dict], max_tokens: int = 256) -> str:
        prompt = self._build_summarization_prompt(messages)
        try:
            resp = self.openai.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                temperature=0.2,
            )
            # Access the message correctly
            summary = resp.choices[0].message.content.strip()
            return summary
        except Exception as e:
            print("Summarization failed:", e)
            return "[SUMMARY FAILED]"

    def maybe_summarize(self):
        self.run_count += 1
        if self.k_summarize <= 0 or (self.run_count % self.k_summarize != 0):
            return False

        n_keep_recent = 2
        if len(self.history) <= n_keep_recent + 1:
            return False

        messages_to_summarize = self.history[:-n_keep_recent]
        summary_text = self.summarize_messages(messages_to_summarize)
        summary_message = {
            "role": "system",
            "content": f"[SUMMARY OF EARLIER CONVERSATION]\n{summary_text}",
        }
        self.history = [summary_message] + self.history[-n_keep_recent:]
        return True

    # Convenience: get truncated view
    def get_truncated_view(self, limit_turns=None, limit_chars=None, limit_words=None):
        view = self.history
        if limit_turns is not None:
            view = view[-limit_turns:]
        if limit_chars is not None:
            view = self.truncate_by_chars(limit_chars)
        if limit_words is not None:
            view = self.truncate_by_words(limit_words)
        return view

    # pretty-print
    def pretty_print(self, msgs=None):
        if msgs is None:
            msgs = self.history
        for i, m in enumerate(msgs, start=1):
            print(f"[{i}] {m['role'].upper()}: {m['content']}\n")


Demonstration with sample conversation:

In [5]:
cm = ConversationManager(model="llama-3.3-70b-versatile", openai_client=client, k_summarize=3)

cm.add_message("user", "Hi, I'm looking to book a meeting next week.")
cm.add_message("assistant", "Sure — what days/times work for you?")
cm.maybe_summarize()

cm.add_message("user", "Any chance for Tuesday or Wednesday morning?")
cm.add_message("assistant", "I have openings on Tuesday 10am and Wednesday 9am.")
cm.maybe_summarize()

cm.add_message("user", "Tuesday 10am works. Also, remind me to prepare the report.")
cm.add_message("assistant", "Noted — meeting set for Tue 10am. Will remind you to prepare the report.")
summed = cm.maybe_summarize()
print("summarized?", summed)
cm.pretty_print()



summarized? True
[1] SYSTEM: [SUMMARY OF EARLIER CONVERSATION]
User is booking a meeting for next week, with preferred days being Tuesday or Wednesday morning. 
Available slots are Tuesday 10am and Wednesday 9am.
* User wants to book a meeting next week
* Preferred days: Tuesday or Wednesday morning
* Available meeting times: Tuesday 10am, Wednesday 9am
* Action: User to confirm meeting time

[2] USER: Tuesday 10am works. Also, remind me to prepare the report.

[3] ASSISTANT: Noted — meeting set for Tue 10am. Will remind you to prepare the report.



In [6]:
print("Last 3 turns:")
cm.pretty_print(cm.get_truncated_view(limit_turns=3))

print("\nTruncated to 150 chars:")
cm.pretty_print(cm.get_truncated_view(limit_chars=150))

print("\nTruncated to 30 words:")
cm.pretty_print(cm.get_truncated_view(limit_words=30))


Last 3 turns:
[1] SYSTEM: [SUMMARY OF EARLIER CONVERSATION]
User is booking a meeting for next week, with preferred days being Tuesday or Wednesday morning. 
Available slots are Tuesday 10am and Wednesday 9am.
* User wants to book a meeting next week
* Preferred days: Tuesday or Wednesday morning
* Available meeting times: Tuesday 10am, Wednesday 9am
* Action: User to confirm meeting time

[2] USER: Tuesday 10am works. Also, remind me to prepare the report.

[3] ASSISTANT: Noted — meeting set for Tue 10am. Will remind you to prepare the report.


Truncated to 150 chars:
[1] USER: Tuesday 10am works. Also, remind me to prepare the report.

[2] ASSISTANT: Noted — meeting set for Tue 10am. Will remind you to prepare the report.


Truncated to 30 words:
[1] USER: Tuesday 10am works. Also, remind me to prepare the report.

[2] ASSISTANT: Noted — meeting set for Tue 10am. Will remind you to prepare the report.



In [7]:
# Create ConversationManager instance with model llama to analyze the dsifferences
cm = ConversationManager(model="llama-3.3-70b-versatile", openai_client=client, k_summarize=3)

# Run 1
cm.add_message("user", "Hello! I need help planning my weekend trip.")
cm.add_message("assistant", "Sure! Where are you planning to go, and how many days do you have?")
cm.maybe_summarize()  # run_count = 1 -> no summarization

# Run 2
cm.add_message("user", "I'm thinking of going to Pokhara for 2 days.")
cm.add_message("assistant", "Great! Do you prefer adventure activities, sightseeing, or relaxation?")
cm.maybe_summarize()  # run_count = 2 -> no summarization

# Run 3
cm.add_message("user", "I want a mix of sightseeing and some light trekking.")
cm.add_message("assistant", "Perfect! I suggest visiting Phewa Lake, World Peace Pagoda, and doing a short trek to Sarangkot.")
summed = cm.maybe_summarize()  # run_count = 3 -> summarization occurs
print("summarized?", summed)
cm.pretty_print()

# Demonstrate truncation options
print("Last 3 turns:")
cm.pretty_print(cm.get_truncated_view(limit_turns=3))

print("\nTruncated to 150 chars:")
cm.pretty_print(cm.get_truncated_view(limit_chars=150))

print("\nTruncated to 30 words:")
cm.pretty_print(cm.get_truncated_view(limit_words=30))


summarized? True
[1] SYSTEM: [SUMMARY OF EARLIER CONVERSATION]
User is planning a 2-day weekend trip to Pokhara. They are considering options for activities. 
* Destination: Pokhara
* Duration: 2 days
No actions or pending items.

[2] USER: I want a mix of sightseeing and some light trekking.

[3] ASSISTANT: Perfect! I suggest visiting Phewa Lake, World Peace Pagoda, and doing a short trek to Sarangkot.

Last 3 turns:
[1] SYSTEM: [SUMMARY OF EARLIER CONVERSATION]
User is planning a 2-day weekend trip to Pokhara. They are considering options for activities. 
* Destination: Pokhara
* Duration: 2 days
No actions or pending items.

[2] USER: I want a mix of sightseeing and some light trekking.

[3] ASSISTANT: Perfect! I suggest visiting Phewa Lake, World Peace Pagoda, and doing a short trek to Sarangkot.


Truncated to 150 chars:
[1] USER: I want a mix of sightseeing and some light trekking.

[2] ASSISTANT: Perfect! I suggest visiting Phewa Lake, World Peace Pagoda, and doing a short trek 

Task 2: JSON Schema Classification & Information Extraction

In [8]:

import os
import time
import json
import re
import random
from getpass import getpass
from typing import Optional, Callable, Any, Dict

# jsonschema for validation
from jsonschema import validate, ValidationError

from openai import OpenAI

# Get API key without echoing
API_KEY = getpass("Enter your Groq/OpenAI-compatible API key: ")
os.environ["OPENAI_API_KEY"] = API_KEY

# Set Groq API base (use the endpoint you confirmed works).
# Common values: https://api.groq.com/openai/v1  or https://api.groq.com/v1
os.environ["OPENAI_API_BASE"] = "https://api.groq.com/openai/v1"


client = OpenAI()

MODEL = "gpt-4o-mini"

try:
    models = client.models.list()
    print("Authenticated — top models (first 8):", [m.id for m in models.data[:8]])
except Exception as e:
    print("Authentication check FAILED:", str(e))
    # If this fails, stop here and verify API_BASE and key.


Enter your Groq/OpenAI-compatible API key: ··········
Authenticated — top models (first 8): ['gpt-3.5-turbo', 'gpt-audio', 'gpt-5-mini', 'gpt-5-nano-2025-08-07', 'gpt-5-nano', 'gpt-audio-2025-08-28', 'davinci-002', 'babbage-002']


In [9]:
#  Schema + function description

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "description": "Full name of the person"},
        "email": {"type": "string", "format": "email", "description": "Email address"},
        "phone": {"type": "string", "description": "Phone number in any reasonable format"},
        "location": {"type": "string", "description": "City/country or full address"},
        "age": {"type": "integer", "minimum": 0, "maximum": 120, "description": "Person's age in years"}
    },
    "required": ["name"],
    "additionalProperties": False
}

functions = [
    {
        "name": "extract_user_info",
        "description": "Extract name, email, phone, location and age from the user's message in JSON format.",
        "parameters": schema
    }
]



In [10]:
# Retry decorator and helper to handle error due to rate limit in api

def is_rate_limit_error(exc: Exception) -> bool:
    """Return True if exception likely indicates a rate limit (429) or retryable throttling."""

    msg = str(exc).lower()
    if "rate limit" in msg or "too many requests" in msg or "429" in msg:
        return True

    sc = getattr(exc, "status_code", None) or getattr(exc, "http_status", None)
    if sc == 429:
        return True
    return False

def retry_on_rate_limit(max_attempts: int = 6, backoff_base: float = 1.0, max_backoff: float = 30.0):
    """Decorator to retry a function on rate-limit errors with exponential backoff + jitter."""
    def decorator(fn: Callable):
        def wrapper(*args, **kwargs):
            attempt = 0
            while True:
                try:
                    return fn(*args, **kwargs)
                except Exception as e:
                    attempt += 1
                    if attempt >= max_attempts or not is_rate_limit_error(e):

                        raise

                    backoff = min(max_backoff, backoff_base * (2 ** (attempt - 1)))
                    jitter = random.uniform(0, backoff * 0.1)
                    sleep_for = backoff + jitter
                    print(f"[retry_on_rate_limit] Rate limit detected. Attempt {attempt}/{max_attempts}. Sleeping {sleep_for:.1f}s and retrying...")
                    time.sleep(sleep_for)
        return wrapper
    return decorator

# small helper to normalize phone numbers (very simple; adjust for your needs)
def normalize_phone(phone_str: str) -> Optional[str]:
    if not phone_str:
        return None
    s = re.sub(r"[^\d\+]", "", phone_str)  # in order to keep digits and plus

    if len(re.sub(r"\D", "", s)) >= 7:
        return s
    return None



In [11]:
# Main extraction function
@retry_on_rate_limit(max_attempts=6, backoff_base=1.0, max_backoff=20.0)
def extract_info_from_text(text: str, model: Optional[str] = None) -> Dict[str, Any]:
    """
    Sends `text` to the model using function-calling and validates result against the JSON schema.
    Returns a dict with raw response, parsed dict, validation flags and normalized fields.
    """
    if model is None:
        model = MODEL

    messages = [{"role": "user", "content": text}]

    # Call chat completion / function-calling
    resp = client.chat.completions.create(
        model=model,
        messages=messages,
        functions=functions,
        function_call="auto",
        temperature=0.0,
        max_tokens=512
    )


    choice = resp.choices[0]
    message = getattr(choice, "message", None) or choice  # some clients different

    parsed = {}
    raw_args = None


    func_call = getattr(message, "function_call", None)
    if func_call and getattr(func_call, "arguments", None):
        raw_args = func_call.arguments
    else:
        raw_args = getattr(message, "content", "")

    # Parse JSON
    if raw_args:
        try:
            parsed = json.loads(raw_args)
        except Exception:
            # fallback: try to extract JSON substring
            txt = raw_args
            json_match = re.search(r"(\{[\s\S]*\})", txt)
            if json_match:
                try:
                    parsed = json.loads(json_match.group(1))
                except Exception:
                    parsed = {}
            else:
                parsed = {}

    # Normalize / coerce fields
    # Age might be string like "twenty" or "22 years" etc. Try extract digits first.
    age = parsed.get("age")
    if isinstance(age, str):
        # extract first integer found
        m = re.search(r"(\d{1,3})", age)
        if m:
            parsed["age"] = int(m.group(1))
        else:
            # attempt textual number -> not implemented; set None so validation fails later
            parsed["age"] = None

    # phone normalization
    if "phone" in parsed:
        parsed["phone"] = normalize_phone(parsed["phone"])

    # Ensure keys exist (explicitly set None if missing)
    for k in ["name", "email", "phone", "location", "age"]:
        parsed.setdefault(k, None)

    # Validate against schema using jsonschema
    try:
        validate(instance=parsed, schema=schema)
        valid = True
        validation_error = None
    except ValidationError as ve:
        valid = False
        validation_error = str(ve)

    return {
        "raw_model_output": resp,
        "parsed": parsed,
        "valid": valid,
        "validation_error": validation_error
    }




In [12]:
# local regex-based fallback extractor ===

# If phonenumbers is not installed, the code below will still work using simpler regex normalization.

number_word_map = {
    "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,"six":6,"seven":7,"eight":8,"nine":9,
    "ten":10,"eleven":11,"twelve":12,"thirteen":13,"fourteen":14,"fifteen":15,"sixteen":16,
    "seventeen":17,"eighteen":18,"nineteen":19,"twenty":20,"twenty one":21,"twenty-two":22,
    "twenty three":23,"twenty four":24,"twenty five":25
}

def try_parse_age(s):
    if not s: return None
    # extract digits
    m = re.search(r"\b(\d{1,3})\b", s)
    if m:
        val = int(m.group(1))
        if 0 <= val <= 120:
            return val
    # try to map words (simple)
    s2 = s.lower().strip()
    return number_word_map.get(s2)

def normalize_phone_simple(s):
    if not s: return None
    digits = re.sub(r"[^\d\+]", "", s)
    if len(re.sub(r"\D","", digits)) >= 7:
        return digits
    return None

def fallback_extract(text):
    out = {"name": None, "email": None, "phone": None, "location": None, "age": None}
    # name heuristics(look for "I'm X", "I am X", "this is X", "name: X")
    m = re.search(r"(?:i'm|i am|this is|my name is)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", text, re.I)
    if m:
        out["name"] = m.group(1).strip()
    else:
        m2 = re.search(r"name[:\-]\s*([A-Z][\w\s\.]+)", text)
        if m2:
            out["name"] = m2.group(1).strip()
    # email
    m = re.search(r"([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})", text)
    if m:
        out["email"] = m.group(1).strip()
    # phone
    m = re.search(r"(\+?\d{2,4}[\-\s]?\d{6,12}|\b\d{7,12}\b)", text)
    if m:
        out["phone"] = normalize_phone_simple(m.group(1))
    # location
    m = re.search(r"(?:in|at|near|location[:\-])\s+([A-Z][\w\s]+)", text)
    if m:
        out["location"] = m.group(1).strip().rstrip(".")
    # age
    m = re.search(r"\b(age|I'm|I am|i'm)\s*[:\-]?\s*(\d{1,3})\b", text, re.I)
    if m:
        out["age"] = try_parse_age(m.group(2))
    else:
        #  for phrases like "I'm 22 years old" or "Age: 28"
        m2 = re.search(r"\b(\d{1,3})\s*(?:years?|yrs?)\b", text, re.I)
        if m2:
            out["age"] = try_parse_age(m2.group(1))
        else:
            # try textual
            for word_form in number_word_map.keys():
                if word_form in text.lower():
                    out["age"] = number_word_map[word_form]
                    break
    # ensure at least name exists otherwise None
    return out


In [19]:
# Updated wrapper: try API, fall back to fallback_extract
MODEL = "gpt-4o-mini"

def is_insufficient_quota(exc: Exception) -> bool:
    """
    Heuristic to detect quota / billing errors from exception text.
    Adjust the keywords to fit the exact messages you see from the Groq/OpenAI client.
    """
    msg = str(exc).lower()
    keywords = ["insufficient_quota", "quota", "billing", "insufficient credits", "payment required", "limit exceeded"]
    return any(k in msg for k in keywords)

@retry_on_rate_limit(max_attempts=4)
def call_api_extract(text):
    messages = [{"role": "user", "content": text}]
    resp = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        functions=functions,
        function_call="auto",
        temperature=0.0,
        max_tokens=400
    )

    # try to parse function_call or content
    choice = resp.choices[0]
    # some SDKs put the message under choice.message, others under the choice object directly
    message = getattr(choice, "message", None) or choice
    parsed = {}
    raw_args = None

    # function_call.arguments can be a JSON string or already-parsed dict in some SDKs
    fc = getattr(message, "function_call", None)
    if fc and getattr(fc, "arguments", None):
        raw_args = fc.arguments
    else:
        # fallback to content (string)
        raw_args = getattr(message, "content", "") or ""

    # If raw_args is already a dict, use it directly; otherwise try to load JSON
    if isinstance(raw_args, dict):
        parsed = raw_args
    else:
        # Expecting a JSON string; try direct load, else extract JSON substring
        try:
            parsed = json.loads(raw_args) if raw_args else {}
        except Exception:
            m = re.search(r"(\{[\s\S]*\})", str(raw_args))
            if m:
                try:
                    parsed = json.loads(m.group(1))
                except Exception:
                    parsed = {}
            else:
                parsed = {}

    # coerce age if string
    if "age" in parsed and isinstance(parsed["age"], str):
        a = re.search(r"(\d{1,3})", parsed["age"])
        if a:
            parsed["age"] = int(a.group(1))
        else:
            parsed["age"] = None

    # normalize phone minimal (assumes normalize_phone_simple exists)
    if "phone" in parsed and parsed["phone"] is not None:
        parsed["phone"] = normalize_phone_simple(parsed["phone"])

    # ensure keys exist
    for k in ["name", "email", "phone", "location", "age"]:
        parsed.setdefault(k, None)

    # validate against schema
    try:
        validate(instance=parsed, schema=schema)
        valid = True
    except ValidationError:
        valid = False

    return {"from": "api", "parsed": parsed, "valid": valid, "raw": resp}


def extract_info_with_fallback(text):
    """
    Try API extraction first. On exceptions (connectivity, quota), fall back to local extractor.
    Returns a dict with consistent keys:
      - from: "api" or "fallback"
      - parsed: dict
      - valid: bool (schema validation result)
      - raw: API response object (if from api) else None
      - error: error string if fallback was used
    """
    try:
        out = call_api_extract(text)
        return out
    except Exception as e:
        # Decide if this is a quota/billing error (so we shouldn't retry further)
        if is_insufficient_quota(e):
            print("[extract_info_with_fallback] Insufficient quota detected — using local fallback extractor.")
        else:
            print(f"[extract_info_with_fallback] API call failed ({e}) — using local fallback extractor.")

        # Local fallback extractor should return a dict-like parsed result
        parsed = fallback_extract(text)

        # Ensure keys exist in fallback output too
        for k in ["name", "email", "phone", "location", "age"]:
            parsed.setdefault(k, None)

        # Apply same normalization used for API path
        if isinstance(parsed.get("age"), str):
            a = re.search(r"(\d{1,3})", parsed["age"])
            parsed["age"] = int(a.group(1)) if a else None

        if parsed.get("phone") is not None:
            parsed["phone"] = normalize_phone_simple(parsed["phone"])

        # Validate fallback parsed result
        try:
            validate(instance=parsed, schema=schema)
            valid = True
        except ValidationError:
            valid = False

        return {"from": "fallback", "parsed": parsed, "valid": valid, "raw": None, "error": str(e)}



Demonstrate with 3 sample chats

In [20]:
#  demo run
sample_chats = [
    "Hi, I'm Isu Sharma. My email is isu.sharma@example.com and phone is +977-9812345678. I live in Kathmandu. I'm 22 years old.",
    "Hello — name: Sita Sharma; contact: sita_sharma@gmail.com;phone:+977-9845626345 location: Dang. Age: 28.",
    "Hey there, this is Hari. You can reach me at 9841234567. I'm 19 and staying near Lalitpur.",

]

for i, chat in enumerate(sample_chats, 1):
    print(f"\n--- Sample chat #{i} ---")
    print("Input:", chat)   # 👈 added line to show input
    out = extract_info_with_fallback(chat)
    print("Source:", out.get("from"))
    print("Parsed:", out["parsed"])
    print("ValidAgainstSchema:", out["valid"])




--- Sample chat #1 ---
Input: Hi, I'm Isu Sharma. My email is isu.sharma@example.com and phone is +977-9812345678. I live in Kathmandu. I'm 22 years old.
[retry_on_rate_limit] Rate limit detected. Attempt 1/4. Sleeping 1.0s and retrying...
[retry_on_rate_limit] Rate limit detected. Attempt 2/4. Sleeping 2.0s and retrying...
[retry_on_rate_limit] Rate limit detected. Attempt 3/4. Sleeping 4.0s and retrying...
[extract_info_with_fallback] Insufficient quota detected — using local fallback extractor.
Source: fallback
Parsed: {'name': 'Isu Sharma', 'email': 'isu.sharma@example.com', 'phone': '+9779812345678', 'location': 'Kathmandu', 'age': 22}
ValidAgainstSchema: True

--- Sample chat #2 ---
Input: Hello — name: Sita Sharma; contact: sita_sharma@gmail.com;phone:+977-9845626345 location: Dang. Age: 28.
[retry_on_rate_limit] Rate limit detected. Attempt 1/4. Sleeping 1.0s and retrying...
[retry_on_rate_limit] Rate limit detected. Attempt 2/4. Sleeping 2.1s and retrying...
[retry_on_rate_li