In [14]:
!pip install --quiet openai jsonschema


In [15]:
import os
import json
import time
from typing import List, Dict, Any, Optional
from getpass import getpass
from jsonschema import validate, ValidationError
from openai import OpenAI

# Read key from environment
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Prompt the user for a key
if not GROQ_API_KEY:
    print("No GROQ_API_KEY found in environment.")
    print("You can paste it now (it will not be echoed). If you don't want to, leave blank to run offline demo.")
    GROQ_API_KEY = getpass("Enter GROQ API Key (or press Enter to run offline): ").strip() or None

# Groq base URL
GROQ_API_BASE = os.getenv("GROQ_API_BASE", "https://api.groq.com/openai/v1")

client: Optional[OpenAI] = None
if GROQ_API_KEY:
    client = OpenAI(api_key=GROQ_API_KEY, base_url=GROQ_API_BASE)
    print("✅ Groq client configured (will call live API).")
else:
    print("⚠️ Running in OFFLINE fallback mode (no API calls).")



No GROQ_API_KEY found in environment.
You can paste it now (it will not be echoed). If you don't want to, leave blank to run offline demo.
Enter GROQ API Key (or press Enter to run offline): ··········
✅ Groq client configured (will call live API).


In [16]:
class ConversationManager:
    def __init__(self, client: Optional[OpenAI], model: str = "llama-3.3-70b-versatile", summarize_every_k: int = 3):
        """
        client: OpenAI-compatible client (Groq). If None -> offline summarizer used.
        model: model name to call for summarization when client is available.
        summarize_every_k: perform summarization after every k runs (k>0). If 0 -> disabled.
        """
        self.history: List[Dict[str, str]] = []  # list of {'role': role, 'content': text}
        self.runs = 0
        self.client = client
        self.model = model
        self.summarize_every_k = summarize_every_k

    def add_message(self, role: str, content: str):
        self.history.append({"role": role, "content": content})

    def get_last_n_turns(self, n: int) -> List[Dict[str, str]]:
        return self.history[-n:] if n > 0 else []

    def get_truncated_by_chars(self, max_chars: int) -> List[Dict[str, str]]:
        out = []
        total = 0
        for msg in reversed(self.history):
            if total + len(msg["content"]) > max_chars:
                break
            out.insert(0, msg)
            total += len(msg["content"])
        return out

    def get_truncated_by_words(self, max_words: int) -> List[Dict[str, str]]:
        out = []
        total = 0
        for msg in reversed(self.history):
            w = len(msg["content"].split())
            if total + w > max_words:
                break
            out.insert(0, msg)
            total += w
        return out

    def _local_summarize(self, prompt_extra: str = "") -> str:
        # crude local summarizer (safe offline fallback)
        joined = " ".join([m["content"] for m in self.history])
        # Return first N chars of joined, plus an ellipsis; also pull out top sentences heuristically
        preview = joined[:800]
        # Attempt sentence-like split
        sentences = preview.replace("\n", " ").split(". ")
        bullets = []
        for s in sentences[:6]:
            s = s.strip()
            if s:
                bullets.append("- " + (s[:200] + ("..." if len(s) > 200 else "")))
        return "\n".join(bullets) if bullets else preview

    def summarize_history(self, prompt_extra: str = "Summarize the conversation concisely in 4 bullet points.") -> str:
        joined = "\n".join([f"{m['role']}: {m['content']}" for m in self.history])
        if not self.client:
            # offline fallback
            return self._local_summarize(prompt_extra)

        messages = [
            {"role": "system", "content": "You are a concise summarizer. Produce a short summary or 3-6 bullets."},
            {"role": "user", "content": prompt_extra + "\n\nConversation:\n" + joined}
        ]
        try:
            resp = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                max_tokens=300,
                temperature=0.2,
            )
            # handle different resp shapes
            choice = resp.choices[0]
            # try attribute access
            msg = getattr(choice, "message", None) or choice.get("message", {})
            # content might be attribute or dict key
            content = getattr(msg, "content", None) or msg.get("content")
            if content:
                return content.strip()
            # fallback to text
            return str(choice)
        except Exception as e:
            print("API summarization failed:", e)
            return self._local_summarize(prompt_extra)

    def maybe_periodic_summarize(self, prompt_extra: str = "") -> Optional[str]:
        self.runs += 1
        if self.summarize_every_k > 0 and (self.runs % self.summarize_every_k == 0):
            summary = self.summarize_history(prompt_extra)
            # replace history with single system summary entry
            self.history = [{"role": "system", "content": "SUMMARY: " + summary}]
            return summary
        return None

    def print_history(self):
        print("=== Conversation history ===")
        for i, m in enumerate(self.history):
            content_preview = m["content"] if len(m["content"]) <= 300 else m["content"][:300] + "..."
            print(f"[{i}] {m['role']}: {content_preview}")
        print("============================")

    def get_messages_for_api(self, keep_last_n: Optional[int] = None) -> List[Dict[str, str]]:
        # return a list in the shape the chat API expects
        msgs = self.history if keep_last_n is None else self.get_last_n_turns(keep_last_n)
        return [{"role": m["role"], "content": m["content"]} for m in msgs]


In [21]:
conv = ConversationManager(client=client, model="llama-3.3-70b-versatile", summarize_every_k=3)

# Feed multiple conversation samples
samples = [
    ("user", "Hi, I'm planning a trip to Goa next month. Any suggestions?"),
    ("assistant", "Sure! What dates and what kind of activities do you like?"),
    ("user", "I'm going from 10th to 15th Oct, I like beaches and local food."),
    ("assistant", "Great — book an early morning boat ride, visit Anjuna market and try seafood at Fisherman's Wharf."),
    ("user", "Thanks. Also, recommend budget hotels near Calangute."),
    ("assistant", "Check Hotel XYZ and ABC guesthouse. I can fetch exact prices if you want."),
    ("user", "Also what about local transport?"),
    ("assistant", "Scooter rental is common; many places provide daily rentals under ₹600.")
]

for role, text in samples:
    conv.add_message(role, text)

print("\n-- Full History --")
conv.print_history()

print("\n-- Last 3 messages (turns):")
for m in conv.get_last_n_turns(3):
    print(m)

print("\n-- Truncated by chars (max 150 chars):")
for m in conv.get_truncated_by_chars(150):
    print(m)

print("\n-- Truncated by words (max 40 words):")
for m in conv.get_truncated_by_words(40):
    print(m)

print("\n-- Demonstrate periodic summarization (k=3) --")
for i in range(1, 5):
    conv.add_message("user", f"Follow-up message {i}: I have another question.")
    summary = conv.maybe_periodic_summarize(prompt_extra="Create a short summary (3 bullets) of the conversation so far.")
    if summary:
        print(f"\n*** Summary created after run {conv.runs}: ***\n{summary}\n")
        print("-- History after summarization --")
        conv.print_history()
        conv.add_message("assistant", "Acknowledged. Let's proceed.")



-- Full History --
=== Conversation history ===
[0] user: Hi, I'm planning a trip to Goa next month. Any suggestions?
[1] assistant: Sure! What dates and what kind of activities do you like?
[2] user: I'm going from 10th to 15th Oct, I like beaches and local food.
[3] assistant: Great — book an early morning boat ride, visit Anjuna market and try seafood at Fisherman's Wharf.
[4] user: Thanks. Also, recommend budget hotels near Calangute.
[5] assistant: Check Hotel XYZ and ABC guesthouse. I can fetch exact prices if you want.
[6] user: Also what about local transport?
[7] assistant: Scooter rental is common; many places provide daily rentals under ₹600.

-- Last 3 messages (turns):
{'role': 'assistant', 'content': 'Check Hotel XYZ and ABC guesthouse. I can fetch exact prices if you want.'}
{'role': 'user', 'content': 'Also what about local transport?'}
{'role': 'assistant', 'content': 'Scooter rental is common; many places provide daily rentals under ₹600.'}

-- Truncated by chars (ma

In [20]:
# JSON schema for extraction of 5 fields
EXTRACTION_SCHEMA = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "email": {"type": "string", "format": "email"},
        "phone": {"type": "string"},
        "location": {"type": "string"},
        "age": {"type": ["integer", "null"]}
    },
    "required": ["name", "email"]
}

functions = [
    {
        "name": "extract_user_info",
        "description": "Extract name, email, phone, location, age from the chat text. Return valid JSON according to the provided schema.",
        "parameters": EXTRACTION_SCHEMA
    }
]

def offline_extract(chat_text: str) -> Dict[str, Any]:
    """Best-effort offline parser using regex for demo when no API key."""
    import re
    out = {"name": None, "email": None, "phone": None, "location": None, "age": None}
    # email
    m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", chat_text)
    if m: out["email"] = m.group(1)
    # phone
    m = re.search(r"(\+?\d[\d\-\s]{6,}\d)", chat_text)
    if m: out["phone"] = m.group(1).strip()
    # age
    m = re.search(r"\b([1-9][0-9]?)\b", chat_text)
    if m:
        val = int(m.group(1))
        if 10 <= val <= 120:
            out["age"] = val
    # name heuristics
    m = re.search(r"(?:my name is|I'm|I am|name's)\s+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)", chat_text)
    if m: out["name"] = m.group(1)
    # location heuristics
    m = re.search(r"(?:based in|from|in)\s+([A-Z][a-zA-Z\s]+)", chat_text)
    if m: out["location"] = m.group(1).strip().strip(".")
    return out

def call_extraction_api(chat_text: str) -> Dict[str, Any]:
    """Call Groq/OpenAI function-calling. If client is None, use offline extraction."""
    if not client:
        return offline_extract(chat_text)

    try:
        resp = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[
                {"role": "system", "content": "You are an extractor. Return only valid JSON matching the schema via function call."},
                {"role": "user", "content": f"Extract fields from this chat: {chat_text}"}
            ],
            functions=functions,
            function_call={"name": "extract_user_info"},
            temperature=0
        )
        choice = resp.choices[0]
        msg = getattr(choice, "message", None) or choice.get("message", {})
        func_call = None
        if hasattr(msg, "function_call") and getattr(msg, "function_call", None):
            func_call = msg.function_call
            args_raw = getattr(func_call, "arguments", None) or func_call.get("arguments")
        else:
            func_call = msg.get("function_call") if isinstance(msg, dict) else None
            args_raw = func_call.get("arguments") if isinstance(func_call, dict) else None

        if args_raw:
            if isinstance(args_raw, (dict, list)):
                parsed = args_raw
            else:
                try:
                    parsed = json.loads(args_raw)
                except Exception:
                    parsed = {}
                    try:
                        import ast
                        parsed = ast.literal_eval(args_raw)
                    except Exception:
                        parsed = {"raw": args_raw}
            return parsed

        content = getattr(msg, "content", None) or msg.get("content", "")
        try:
            return json.loads(content)
        except Exception:
            return {"raw_text": content}

    except Exception as e:
        return {"error": str(e)}


In [19]:
sample_chats = [
    "Hi, my name is John Doe. I’m 29 years old. You can reach me at john@example.com or call 9876543210. I live in Bangalore.",
    "Hello, I’m Alice, 35 years old from New York. Email: alice@mail.com, Phone: 1234567890.",
    "Name’s Rahul Sharma, age 41. Based in Mumbai. Email rahul.sharma@mail.in, phone 9988776655."
]

results = []
for chat in sample_chats:
    print("\n--- Chat ---")
    print(chat)
    extracted = call_extraction_api(chat)
    print("\n-- Extracted --")
    print(json.dumps(extracted, indent=2))

    # Validate against schema
    valid = False
    try:
        if isinstance(extracted, dict) and "error" not in extracted:
            validate(instance=extracted, schema=EXTRACTION_SCHEMA)
            valid = True
    except ValidationError as ve:
        print("ValidationError:", ve.message)
        valid = False
    except Exception as e:
        print("Validation check error:", e)
        valid = False

    print("-- Valid?:", valid)
    results.append({"chat": chat, "extracted": extracted, "valid": valid})

with open("groq_conversation_assignment_results.json", "w") as f:
    json.dump(results, f, indent=2)
print("\n✅ Saved results to groq_conversation_assignment_results.json")



--- Chat ---
Hi, my name is John Doe. I’m 29 years old. You can reach me at john@example.com or call 9876543210. I live in Bangalore.

-- Extracted --
{
  "age": 29,
  "email": "john@example.com",
  "location": "Bangalore",
  "name": "John Doe",
  "phone": "9876543210"
}
-- Valid?: True

--- Chat ---
Hello, I’m Alice, 35 years old from New York. Email: alice@mail.com, Phone: 1234567890.

-- Extracted --
{
  "age": 35,
  "email": "alice@mail.com",
  "location": "New York",
  "name": "Alice",
  "phone": "1234567890"
}
-- Valid?: True

--- Chat ---
Name’s Rahul Sharma, age 41. Based in Mumbai. Email rahul.sharma@mail.in, phone 9988776655.

-- Extracted --
{
  "age": 41,
  "email": "rahul.sharma@mail.in",
  "location": "Mumbai",
  "name": "Rahul Sharma",
  "phone": "9988776655"
}
-- Valid?: True

✅ Saved results to groq_conversation_assignment_results.json
