In [1]:
# Cell 1 - Setup
# Only standard Python + requests + openai
!pip install --quiet openai requests

import os
import json
import time
from getpass import getpass
from typing import List, Dict, Any, Tuple
import requests


In [2]:
# Cell 2 - Get API key safely at runtime (do NOT hardcode)
# Option A: Put your GROQ_API_KEY into an env var in Colab before running:
#   import os; os.environ['GROQ_API_KEY'] = "sk-..."
# Option B: Enter interactively (preferred for interactive testing)
key = os.environ.get("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")
if not key:
    print("Please paste your Groq API key (it will not be shown):")
    key = getpass("GROQ API KEY: ")
    # for safety, do not persist to disk
    os.environ["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"] = key

API_KEY = key
BASE_URL = "https://api.groq.com/openai/v1"  # Groq's OpenAI-compatible base URL
print("API key loaded into environment (not shown). Base URL:", BASE_URL)


Please paste your Groq API key (it will not be shown):
GROQ API KEY: ··········
API key loaded into environment (not shown). Base URL: https://api.groq.com/openai/v1


In [3]:
# Cell 3 - helper to call Groq OpenAI-compatible Chat Completions
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def groq_chat_completion(messages: List[Dict[str,str]],
                         model: str = "openai/gpt-oss-20b",
                         temperature: float = 0.2,
                         functions: List[Dict] = None,
                         function_call: Any = None,
                         max_tokens: int = 512) -> Dict:
    """
    Minimal wrapper around Groq Chat Completions endpoint (OpenAI-compatible).
    If functions is provided, it passes them through for function-calling behavior.
    """
    url = f"{BASE_URL}/chat/completions"
    payload = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
        "max_tokens": max_tokens
    }
    if functions is not None:
        payload["functions"] = functions
    if function_call is not None:
        payload["function_call"] = function_call  # e.g., "auto" or specific function name

    r = requests.post(url, headers=HEADERS, json=payload, timeout=60)
    r.raise_for_status()
    return r.json()


In [4]:
# Cell 4 - Conversation store and truncation/summarization hooks

class ConversationStore:
    def __init__(self):
        # store messages as list of {"role": "user"|"assistant"|"system", "content": "text"}
        self.history: List[Dict[str,str]] = []
        self.run_count = 0

    def add_message(self, role: str, content: str):
        self.history.append({"role": role, "content": content})

    def get_last_n_turns(self, n: int) -> List[Dict[str,str]]:
        # A "turn" = user + assistant; keep last n user/assistant pairs approx.
        if n <= 0:
            return []
        # Build turns from history; simpler approach: take last (2*n) messages
        return self.history[-2*n:]

    def get_by_char_limit(self, char_limit: int) -> List[Dict[str,str]]:
        # return last messages until combined char length <= char_limit
        out = []
        total = 0
        for msg in reversed(self.history):
            l = len(msg["content"])
            if total + l > char_limit and out:
                break
            out.insert(0, msg)
            total += l
        return out

    def get_by_word_limit(self, word_limit: int) -> List[Dict[str,str]]:
        out = []
        total = 0
        for msg in reversed(self.history):
            w = len(msg["content"].split())
            if total + w > word_limit and out:
                break
            out.insert(0, msg)
            total += w
        return out

    def replace_with_summary(self, summary_text: str):
        # Replace entire history with a short system summary + keep last user message (if any)
        preserved_tail = self.history[-1:] if self.history else []
        self.history = [{"role":"system", "content": f"Summary of previous conversation: {summary_text}"}] + preserved_tail

    def maybe_periodic_summarize(self, k:int, summarizer_fn):
        """
        Call summarizer_fn(history) -> summary_text after every k runs.
        """
        self.run_count += 1
        if k > 0 and (self.run_count % k == 0):
            print(f"Periodic summarization triggered at run {self.run_count}.")
            summary = summarizer_fn(self.history)
            self.replace_with_summary(summary)
            return summary
        return None


In [5]:
# Cell 5 - Summarizer (uses groq_chat_completion)
def summarize_history_with_groq(history: List[Dict[str,str]], model="openai/gpt-oss-20b") -> str:
    # Build a prompt that instructs the model to produce a concise summary
    conversation_text = "\n\n".join([f"{m['role'].upper()}: {m['content']}" for m in history])
    prompt = (
        "You are a conversation summarizer. Produce a concise summary (2-4 sentences) "
        "that captures the important intents, facts, and any action items mentioned in the conversation. "
        "If personal details like name/email/phone were mentioned, list them briefly. "
        "Keep it short and neutral.\n\nConversation:\n" + conversation_text
    )
    messages = [{"role":"user","content":prompt}]
    resp = groq_chat_completion(messages=messages, model=model, temperature=0.0, max_tokens=200)
    # Groq returns a response structure like OpenAI; extract text
    # Attempt to extract from choices
    choices = resp.get("choices") or resp.get("outputs") or []
    if choices:
        # OpenAI-like: choices[0]["message"]["content"]["text"] or message.content
        first = choices[0]
        msg = first.get("message") or first
        # message may be dict with content field
        if isinstance(msg, dict):
            # Possible structures: msg["content"] or msg["content"]["text"] or msg["content"][0]
            content = ""
            if "content" in msg:
                c = msg["content"]
                if isinstance(c, str):
                    content = c
                elif isinstance(c, dict) and "text" in c:
                    content = c["text"]
                elif isinstance(c, list) and len(c) > 0:
                    content = str(c[0])
            elif "text" in msg:
                content = msg["text"]
            else:
                content = str(msg)
        else:
            content = str(msg)
    else:
        content = ""
    return content.strip() or "(no summary produced)"

# quick local test (no API call)
# print(summarize_history_with_groq([{"role":"user","content":"Hello"}, {"role":"assistant","content":"Hi"}]))


In [6]:
# Cell 6 - Demo of Task 1
store = ConversationStore()

# Sample conversation inputs (simulate many turns)
samples = [
    ("user","Hi, I'm Paavan. I need help booking flights to Bangalore."),
    ("assistant","Sure — when do you plan to travel and from which city?"),
    ("user","From Mumbai to Bangalore on Oct 15, returning Oct 20."),
    ("assistant","How many passengers? Any airline preferences?"),
    ("user","Just me. Prefer morning flights and low-cost carriers."),
    ("assistant","Got it — searching..."),
    ("user","Also please note my email is paavan@example.com and phone +91-98XXXXXX"),
    ("assistant","Thanks, I'll use that contact info for ticketing."),
    ("user","Also remind me to pick up an adapter at the airport."),
    ("assistant","Noted.")
]

# Feed messages into store (multiple runs to trigger periodic summarization)
print("Feeding messages into conversation store...")
for role, text in samples:
    store.add_message(role, text)

# Show last 3 turns truncation (n=3)
print("\n--- Last 3 turns (approx) ---")
for m in store.get_last_n_turns(3):
    print(m)

# Show truncation by char limit
print("\n--- By char limit 150 ---")
for m in store.get_by_char_limit(150):
    print(m)

# Show truncation by word limit
print("\n--- By word limit 40 ---")
for m in store.get_by_word_limit(40):
    print(m)

# Now demo periodic summarization: set k=3 and call maybe_periodic_summarize 3 times
print("\n--- Periodic summarization demo (k=3) ---")
def summarizer_fn(history):
    # For the demo, call summarizer; to avoid real API quota usage, you may mock this
    return summarize_history_with_groq(history)

# Run three times (simulate activity), each time calling maybe_periodic_summarize
for i in range(1,4):
    print(f"\nRun {i}:")
    s = store.maybe_periodic_summarize(k=3, summarizer_fn=summarizer_fn)
    if s:
        print("Summary produced and stored in conversation history:")
        print(s)
    else:
        print("No summarization this run.")

print("\nConversation history after periodic summarization:")
for m in store.history:
    print(m)


Feeding messages into conversation store...

--- Last 3 turns (approx) ---
{'role': 'user', 'content': 'Just me. Prefer morning flights and low-cost carriers.'}
{'role': 'assistant', 'content': 'Got it — searching...'}
{'role': 'user', 'content': 'Also please note my email is paavan@example.com and phone +91-98XXXXXX'}
{'role': 'assistant', 'content': "Thanks, I'll use that contact info for ticketing."}
{'role': 'user', 'content': 'Also remind me to pick up an adapter at the airport.'}
{'role': 'assistant', 'content': 'Noted.'}

--- By char limit 150 ---
{'role': 'assistant', 'content': "Thanks, I'll use that contact info for ticketing."}
{'role': 'user', 'content': 'Also remind me to pick up an adapter at the airport.'}
{'role': 'assistant', 'content': 'Noted.'}

--- By word limit 40 ---
{'role': 'assistant', 'content': 'Got it — searching...'}
{'role': 'user', 'content': 'Also please note my email is paavan@example.com and phone +91-98XXXXXX'}
{'role': 'assistant', 'content': "Thanks

In [7]:
# Cell 7 - JSON Schema for extraction (5 fields): name, email, phone, location, age
# We'll expose it as an OpenAI-style "function" with JSON schema in `functions`.

extraction_function = {
    "name": "extract_contact_info",
    "description": "Extract name, email, phone, location, age from the user chat for information collection.",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {"type":"string", "description":"Full name of the person if present"},
            "email": {"type":"string", "format":"email", "description":"Email address if present"},
            "phone": {"type":"string", "description":"Phone number if present"},
            "location": {"type":"string", "description":"Location / city / country if present"},
            "age": {"type":"integer", "minimum":0, "description":"Age if explicitly stated"}
        },
        "required": []  # nothing strictly required; we'll accept partials
    }
}


In [8]:
# Cell 8 - wrapper to call chat with function-calling and parse the returned function call
def call_extraction_function(chat_messages: List[Dict[str,str]]):
    resp = groq_chat_completion(messages=chat_messages,
                               model="openai/gpt-oss-20b",
                               temperature=0.0,
                               functions=[extraction_function],
                               function_call="auto",
                               max_tokens=400)
    # Inspect response to find function call
    choices = resp.get("choices", [])
    if not choices:
        raise RuntimeError("No choices returned.")
    message = choices[0].get("message") or choices[0]
    # Function call may be in message.get("function_call")
    func_call = message.get("function_call") or message.get("tools") or None
    # For compatibility, also check content
    return {"raw_response": resp, "message": message, "function_call": func_call}

# Demo sample chats for Task 2
sample_chats = [
    [{"role":"user", "content":"Hello, I'm Aarti Sharma. My email is aarti.sharma@example.com and phone is +91-9876543210. I'm located in Pune and I'm 28 years old."}],
    [{"role":"user", "content":"Hey, this is Rohit. You can reach me at rohit123@domain.net. I live in Delhi."}],
    [{"role":"user", "content":"Name: Sneha. Phone: 077-555-0123. Age: 35. Email: sneha_35@mail.com. Lives in Kochi."}]
]

results = []
for chat in sample_chats:
    out = call_extraction_function(chat)
    print("\n=== Raw function_call ===")
    print(json.dumps(out["function_call"], indent=2))
    results.append(out)



=== Raw function_call ===
{
  "name": "extract_contact_info",
  "arguments": "{\"age\":28,\"email\":\"aarti.sharma@example.com\",\"location\":\"Pune\",\"name\":\"Aarti Sharma\",\"phone\":\"+91-9876543210\"}"
}

=== Raw function_call ===
{
  "name": "extract_contact_info",
  "arguments": "{\"email\":\"rohit123@domain.net\",\"location\":\"Delhi\",\"name\":\"Rohit\"}"
}

=== Raw function_call ===
{
  "name": "extract_contact_info",
  "arguments": "{\"age\":35,\"email\":\"sneha_35@mail.com\",\"location\":\"Kochi\",\"name\":\"Sneha\",\"phone\":\"077-555-0123\"}"
}


In [9]:
# Cell 9 - Basic manual validator that enforces types/constraints in the extraction_function
def validate_extracted(obj: Dict[str,Any], schema: Dict[str,Any]) -> Tuple[bool, List[str]]:
    errors = []
    props = schema["parameters"]["properties"]
    for k, v in obj.items():
        if k not in props:
            errors.append(f"Unknown field: {k}")
            continue
        expected_type = props[k].get("type")
        if expected_type == "integer":
            if not isinstance(obj[k], int):
                # try numeric string to int
                try:
                    obj[k] = int(obj[k])
                except Exception:
                    errors.append(f"Field {k} expected integer but got {type(obj[k]).__name__}")
        elif expected_type == "string":
            if not isinstance(obj[k], str):
                errors.append(f"Field {k} expected string but got {type(obj[k]).__name__}")
        # basic format checks (email)
        if props[k].get("format") == "email" and obj.get(k):
            if "@" not in obj[k]:
                errors.append(f"Field {k} does not look like an email: {obj[k]}")
    return (len(errors)==0, errors)

# Parse the function call content if present
for idx, r in enumerate(results):
    fc = r["function_call"]
    print(f"\n--- Sample {idx+1} ---")
    if not fc:
        print("No function call returned; falling back to text parse.")
        continue
    # function_call could structure arguments as a JSON string in fc["arguments"]
    args = {}
    if isinstance(fc, dict):
        args_raw = fc.get("arguments") or fc.get("body") or ""
        try:
            # sometimes it's a JSON string
            if isinstance(args_raw, str):
                args = json.loads(args_raw)
            elif isinstance(args_raw, dict):
                args = args_raw
        except Exception as e:
            print("Failed to load arguments JSON:", e)
            args = {}
    print("Parsed arguments:", args)
    ok, errs = validate_extracted(args, extraction_function)
    print("Valid:", ok)
    if errs:
        print("Errors:", errs)
    else:
        print("Validated object:", args)



--- Sample 1 ---
Parsed arguments: {'age': 28, 'email': 'aarti.sharma@example.com', 'location': 'Pune', 'name': 'Aarti Sharma', 'phone': '+91-9876543210'}
Valid: True
Validated object: {'age': 28, 'email': 'aarti.sharma@example.com', 'location': 'Pune', 'name': 'Aarti Sharma', 'phone': '+91-9876543210'}

--- Sample 2 ---
Parsed arguments: {'email': 'rohit123@domain.net', 'location': 'Delhi', 'name': 'Rohit'}
Valid: True
Validated object: {'email': 'rohit123@domain.net', 'location': 'Delhi', 'name': 'Rohit'}

--- Sample 3 ---
Parsed arguments: {'age': 35, 'email': 'sneha_35@mail.com', 'location': 'Kochi', 'name': 'Sneha', 'phone': '077-555-0123'}
Valid: True
Validated object: {'age': 35, 'email': 'sneha_35@mail.com', 'location': 'Kochi', 'name': 'Sneha', 'phone': '077-555-0123'}
