In [None]:
!pip install --quiet jsonschema
!pip install openai==0.28

import os
import json
import time
from typing import List, Dict, Any, Optional

import openai
from jsonschema import validate, ValidationError

from google.colab import userdata
GROQ_API_KEY=userdata.get('GROQ_API_KEY')
OPEN_API_BASE=userdata.get('OPEN_API_BASE')


if not GROQ_API_KEY:
    print("Warning: GROQ_API_KEY environment variable not set. Set it before calling the API.")

openai.api_key = GROQ_API_KEY
openai.api_base =OPEN_API_BASE


MODEL =  'openai/gpt-oss-20b'

print('Setup complete. CLIENT configured to', openai.api_base)

Setup complete. CLIENT configured to https://api.groq.com/openai/v1


In [None]:
class ConversationManager:
    def __init__(self):
        self.history: List[Dict[str, str]] = []
        self.run_count = 0

    def append(self, role: str, content: str):
        self.history.append({'role': role, 'content': content})

    def last_n_turns(self, n: int) -> List[Dict[str, str]]:
        return self.history[-n:]

    def truncate_by_chars(self, max_chars: int) -> List[Dict[str, str]]:
        kept = []
        total = 0
        for msg in reversed(self.history):
            if total + len(msg['content']) > max_chars:
                break
            kept.append(msg)
            total += len(msg['content'])
        return list(reversed(kept))

    def truncate_by_words(self, max_words: int) -> List[Dict[str, str]]:
        kept = []
        total = 0
        for msg in reversed(self.history):
            words = len(msg['content'].split())
            if total + words > max_words:
                break
            kept.append(msg)
            total += words
        return list(reversed(kept))

    def summarize_history(self, summarizer_fn, replace=True):
        """
        summarizer_fn(history: List[Dict]) -> str
        If replace=True: replace entire history with single assistant summary message.
        Returns summary string.
        """
        summary = summarizer_fn(self.history)
        if replace:
            self.history = [{'role': 'assistant', 'content': f"[SUMMARY]\n{summary}"}]
        return summary

    def maybe_periodic_summarize(self, k: int, summarizer_fn, replace=True):
        """Call this after each run; if run_count % k == 0, summarize and replace history."""
        self.run_count += 1
        if k > 0 and self.run_count % k == 0:
            return self.summarize_history(summarizer_fn, replace=replace)
        return None

    def __repr__(self):
        return json.dumps(self.history, indent=2)

In [None]:
def groq_summarize(history: List[Dict[str, str]]) -> str:
    """
    Call the Groq OpenAI-compatible API to summarize conversation history.
    Returns a short summary string.
    """
    if not openai.api_key:
        combined = "\n".join([f"{m['role']}: {m['content']}" for m in history])
        return (combined[:200] + '...\n' + combined[-200:]) if len(combined) > 400 else combined

    system_prompt = (
        "You are a concise summarizer. Summarize the conversation history into 3-6 bullet points, "
        "and include action items if present. Keep it short and factual."
    )

    messages = [{'role':'system', 'content': system_prompt}]
    history_text = "\n\n".join([f"{m['role'].upper()}: {m['content']}" for m in history])
    messages.append({'role': 'user', 'content': f"Here is the conversation history:\n\n{history_text}\n\nPlease summarize:"})

    resp = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        max_tokens=300,
        temperature=0.2,
    )
    summary = resp['choices'][0]['message']['content'].strip()
    return summary

In [None]:

cm = ConversationManager()

samples = [
    ("user", "Hi, I'm Alice. I'm looking for help planning my trip to Japan."),
    ("assistant", "Great — when are you planning to travel and what cities interest you?"),
    ("user", "I want to go in April and visit Tokyo, Kyoto, and Osaka."),
    ("assistant", "Do you have a budget and preference for hotels vs hostels?"),
    ("user", "Budget around $2000, prefer mid-range hotels."),
    ("assistant", "Noted. Any dietary restrictions or mobility concerns?"),
    ("user", "None. Also I have some loyalty points with Airline X."),
    ("assistant", "I can check flights that work with Airline X. Any seat preferences?"),
]

for role, text in samples:
    cm.append(role, text)

print('Full history ({} messages):'.format(len(cm.history)))
print(cm)

print('\n--- Truncate by last 4 messages ---')
print(json.dumps(cm.last_n_turns(4), indent=2))

print('\n--- Truncate by chars (max 150 chars) ---')
print(json.dumps(cm.truncate_by_chars(150), indent=2))

print('\n--- Truncate by words (max 40 words) ---')
print(json.dumps(cm.truncate_by_words(40), indent=2))

cm2 = ConversationManager()
outputs = []
for i, (role, text) in enumerate(samples):
    cm2.append(role, text)
    summary = cm2.maybe_periodic_summarize(k=3, summarizer_fn=groq_summarize, replace=True)
    outputs.append((i+1, len(cm2.history), summary))

print('\n--- Periodic summarization after every 3rd run ---')
for run_no, hist_len, summary in outputs:
    print(f'Run {run_no}: history length = {hist_len}')
    if summary:
        print('  *** Summarized:')
        print('  ', summary.replace('\n', '\n  '))

Full history (8 messages):
[
  {
    "role": "user",
    "content": "Hi, I'm Alice. I'm looking for help planning my trip to Japan."
  },
  {
    "role": "assistant",
    "content": "Great \u2014 when are you planning to travel and what cities interest you?"
  },
  {
    "role": "user",
    "content": "I want to go in April and visit Tokyo, Kyoto, and Osaka."
  },
  {
    "role": "assistant",
    "content": "Do you have a budget and preference for hotels vs hostels?"
  },
  {
    "role": "user",
    "content": "Budget around $2000, prefer mid-range hotels."
  },
  {
    "role": "assistant",
    "content": "Noted. Any dietary restrictions or mobility concerns?"
  },
  {
    "role": "user",
    "content": "None. Also I have some loyalty points with Airline X."
  },
  {
    "role": "assistant",
    "content": "I can check flights that work with Airline X. Any seat preferences?"
  }
]

--- Truncate by last 4 messages ---
[
  {
    "role": "user",
    "content": "Budget around $2000, prefer

In [None]:

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "email": {"type": "string", "format": "email"},
        "phone": {"type": "string"},
        "location": {"type": "string"},
        "age": {"type": ["integer", "string"]}
    },
    "required": ["name", "email"],
    "additionalProperties": False
}

function_def = {
    "name": "extract_contact_info",
    "description": "Extract contact fields from a user-assistant chat.",
    "parameters": schema
}

In [None]:
def groq_extract_structured(chat_text: str) -> Optional[dict]:
    """
    Sends a function-calling (tool-calling) request to the Groq model;
    expects the model to return JSON matching the schema.
    """

    if not openai.api_key:
        out = {}
        for line in chat_text.splitlines():
            if ':' not in line:
                continue
            k, v = [x.strip() for x in line.split(':', 1)]
            lk = k.lower()
            if lk == 'name' and 'name' not in out:
                out['name'] = v
            if lk == 'email' and 'email' not in out:
                out['email'] = v
            if lk == 'phone' and 'phone' not in out:
                out['phone'] = v
            if lk == 'location' and 'location' not in out:
                out['location'] = v
            if lk == 'age' and 'age' not in out:
                try:
                    out['age'] = int(v)
                except:
                    out['age'] = v
        return out

    messages = [
        {"role": "system", "content": "You are a strict JSON extractor. Always return valid JSON matching the schema."},
        {"role": "user", "content": "Extract contact info from this chat:\n\n" + chat_text}
    ]

    tools = [{
        "type": "function",
        "function": {
            "name": "extract_contact_info",
            "description": "Extract user contact info from chat",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "email": {"type": "string"},
                    "phone": {"type": "string"},
                    "location": {"type": "string"},
                    "age": {"type": ["integer", "string"]}
                },
                "required": ["name", "email"]
            }
        }
    }]

    resp = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        tools=tools,
        tool_choice={"type": "function", "function": {"name": "extract_contact_info"}},  # force tool call
        temperature=0,
        max_tokens=300
    )

    choice = resp["choices"][0]["message"]

    if "tool_calls" in choice:
        args_str = choice["tool_calls"][0]["function"]["arguments"]
        try:
            return json.loads(args_str)
        except Exception as e:
            print("Failed to parse JSON:", e, "\nRaw:", args_str)
            return None
    else:
        print("⚠️ Model did not call tool. Raw output:", choice.get("content"))
        return None

In [None]:
def validate_against_schema(data: dict, schema: dict) -> bool:
    try:
        validate(instance=data, schema=schema)
        return True
    except ValidationError as ve:
        print('Validation error:', ve)
        return False

sample_chats = [
    """
    User: Hi, I'm John Doe. My email is john.doe@example.com and my phone is +1-555-1234. I'm 34 years old and live in San Francisco.
    Assistant: Thanks John! We'll contact you at john.doe@example.com.
    """,
    """
    User: Hello. Name: Priya Kumar\nEmail: priya.kumar@mail.com\nLocation: Bangalore, India\nAge: 28\nPhone: 9876543210
    """,
    """
    User: Hey, I'm Martin. You can reach me at martin123@mail.co.uk. I live in London.
    """
]

parsed_results = []
for chat in sample_chats:
    parsed = groq_extract_structured(chat)
    print('\nRaw parsed:', parsed)
    if isinstance(parsed, dict):
        ok = validate_against_schema(parsed, schema)
        print('Valid against schema?', ok)
    parsed_results.append((chat, parsed))

print('\n--- Summary of parsed results ---')
for i, (chat, parsed) in enumerate(parsed_results, 1):
    print(f'Sample {i}:', json.dumps(parsed, indent=2))


Raw parsed: {'age': 34, 'email': 'john.doe@example.com', 'location': 'San Francisco', 'name': 'John Doe', 'phone': '+1-555-1234'}
Valid against schema? True

Raw parsed: {'age': 28, 'email': 'priya.kumar@mail.com', 'location': 'Bangalore, India', 'name': 'Priya Kumar', 'phone': '9876543210'}
Valid against schema? True

Raw parsed: {'email': 'martin123@mail.co.uk', 'location': 'London', 'name': 'Martin'}
Valid against schema? True

--- Summary of parsed results ---
Sample 1: {
  "age": 34,
  "email": "john.doe@example.com",
  "location": "San Francisco",
  "name": "John Doe",
  "phone": "+1-555-1234"
}
Sample 2: {
  "age": 28,
  "email": "priya.kumar@mail.com",
  "location": "Bangalore, India",
  "name": "Priya Kumar",
  "phone": "9876543210"
}
Sample 3: {
  "email": "martin123@mail.co.uk",
  "location": "London",
  "name": "Martin"
}
