In [None]:
import os
import json
import re
from getpass import getpass
from typing import List, Dict, Any, Optional
import time
import textwrap

import openai

**Configure Groq**

In [None]:
GROQ_API_KEY = getpass("my api key")
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

# Create the client configured to talk to Groq's OpenAI-compatible endpoint

client = openai.OpenAI(api_key=os.environ['GROQ_API_KEY'], base_url="https://api.groq.com/openai/v1")
print("Client configured. (Remember: do not commit your key to GitHub.)")

**Small helper for calling chat completions (wraps responses)**

In [None]:
# Helper wrapper to call the chat completions endpoint
def call_chat_model(
    messages: List[Dict[str, str]],
    model: str = "openai/gpt-oss-20b",   # Groq-compatible model name
    temperature: float = 0.2,
    max_tokens: int = 512,
    tools: Optional[List[Dict[str, Any]]] = None,
    tool_choice: Optional[str] = None
) -> Dict[str, Any]:
    """
    Calls the chat completions endpoint using the configured client and returns the raw response dict.
    """
    try:
        kwargs = {
            "model": model,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
        if tools is not None:
            kwargs["tools"] = tools


        if tool_choice in ["none", "auto", "required"]:
            kwargs["tool_choice"] = tool_choice

        resp = client.chat.completions.create(**kwargs)

        # Convert to a regular dict
        return json.loads(json.dumps(resp, default=lambda o: o.__dict__ if hasattr(o, "__dict__") else str(o)))
    except Exception as e:
        print("API call error:", e)
        raise


-------------------------------
**TASK 1: Conversation History Manager + Summarization**
-------------------------------

**ConversationManager class**

In [None]:
# Conversation manager - stores history, truncates, and summarizes using the model.

class ConversationManager:
    def __init__(
        self,
        model: str = "openai/gpt-oss-20b",
        max_turns: Optional[int] = None,    # if set, truncate to last N turns
        max_chars: Optional[int] = None,    # if set, truncate by total characters
        summarization_k: Optional[int] = None,  # run summarization after every k user messages
        summary_threshold_turns: int = 20,  # if history grows above this, consider summarizing older parts
    ):
        self.model = model
        self.history: List[Dict[str,str]] = []
        self.run_counter = 0
        self.max_turns = max_turns
        self.max_chars = max_chars
        self.summarization_k = summarization_k
        self.summary_threshold_turns = summary_threshold_turns

    def add_message(self, role: str, content: str):
        """Add a message to history and trigger truncation/summarization logic."""
        assert role in ("user", "assistant", "system"), "role must be one of 'user'|'assistant'|'system'"
        self.history.append({"role": role, "content": content})
        if role == "user":
            self.run_counter += 1
            if self.summarization_k and (self.run_counter % self.summarization_k == 0):
                self.periodic_summarize()

        # After every message insertion, apply truncation strategies if set
        self.apply_truncation()

    def apply_truncation(self):
        """Apply truncation by turns and/or characters if configured."""

        if self.max_turns and len(self.history) > self.max_turns:
            removed = self.history[:-self.max_turns]
            self.history = self.history[-self.max_turns:]
            summary_text = self.quick_local_summary(removed)
            self.history.insert(0, {"role": "system", "content": f"Summary: {summary_text}"})

        # Truncate by characters (ensure total content length <= max_chars)
        if self.max_chars:
            total_len = sum(len(m["content"]) for m in self.history)
            if total_len > self.max_chars:
                removed = []
                while self.history and sum(len(m["content"]) for m in self.history) > self.max_chars:
                    removed.append(self.history.pop(0))
                if removed:
                    summary_text = self.quick_local_summary(removed)
                    self.history.insert(0, {"role": "system", "content": f"Summary: {summary_text}"})

    def quick_local_summary(self, messages: List[Dict[str,str]]) -> str:
        """
        Quickly summarize messages locally (small heuristic) before calling model.
        This provides a short context string for the model to further compress if needed.
        """
        # Keep the first and last messages and count of messages by role for a quick summary
        if not messages:
            return ""
        first = messages[0]["content"]
        last = messages[-1]["content"]
        counts = {}
        for m in messages:
            counts[m["role"]] = counts.get(m["role"], 0) + 1
        return f"Removed {len(messages)} messages (by roles {counts}). First: {first[:120]} ... Last: {last[:120]}"

    def periodic_summarize(self, summarization_prompt_prefix: str = None):
        """
        Summarize older history using the LLM. We will summarize everything except the most recent messages.
        The summarized text replaces the older messages.
        """
        # Decide what to summarize: everything except last N messages (keep recent 6 messages)
        keep_recent = 6
        if len(self.history) <= keep_recent:
            return  # nothing to summarize
        to_summarize = self.history[:-keep_recent]
        recent = self.history[-keep_recent:]

        # Create a text blob to feed to the model
        blob = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in to_summarize])
        prompt_prefix = summarization_prompt_prefix or (
            "You are a concise summarizer. Summarize the following conversation history into short bullet points "
            "that capture key user requests, decisions, and facts. Keep it short (<= 150 words)."
        )
        messages = [
            {"role": "system", "content": "You summarize conversation history into concise bullet points."},
            {"role": "user",   "content": prompt_prefix + "\n\n" + blob}
        ]


        resp = call_chat_model(messages=messages, model=self.model, temperature=0.0, max_tokens=250)
        try:
            summary_text = resp['choices'][0]['message']['content'].strip()
        except Exception:
            summary_text = "Summary generated (fallback): " + (blob[:200] + "...")

        # Replace the older portion with a single system summary message, keep the recent
        self.history = [{"role": "system", "content": f"AutoSummary: {summary_text}"}] + recent

    def get_history(self) -> List[Dict[str,str]]:
        """Return a copy (for printing/demos)."""
        return list(self.history)


**Task 1 demonstration: feed conversations and show outputs**

In [None]:
#Demonstration for Task 1

# Setup manager with specific truncation & periodic summarization settings
mgr = ConversationManager(model="openai/gpt-oss-20b", max_turns=12, max_chars=None, summarization_k=3)

# Simulate a conversation flow (feed multiple user + assistant messages)
samples = [
    ("user", "Hi, I'm working on a Groq assignment and need help building Chat summarization."),
    ("assistant", "Sure — what do you want to begin with?"),
    ("user", "Explain how to maintain conversation history and summarize periodically."),
    ("assistant", "You can store messages in a list and call a summarization LLM periodically."),
    ("user", "Also show truncation by turns and character limits."),
    ("assistant", "Truncation by number of turns is straightforward: keep last N messages."),
    ("user", "Please summarize my earlier messages now."),  # this will be the 4th user message
    ("assistant", "Okay, I'll summarize when needed."),
    ("user", "Now add more messages to trigger the periodic summarization feature."),
    ("assistant", "I'll show how summarization happens after every 3rd user message."),
    ("user", "Message A"),
    ("user", "Message B"),
    ("user", "Message C"),
    ("user", "Message D"),
    ("assistant", "Now we've added several messages.")
]


for role, content in samples:
    mgr.add_message(role, content)

# Print final history for inspection
print("\n===== Conversation history (after automatic truncation/summarization if triggered) =====")
for i, m in enumerate(mgr.get_history()):
    print(f"{i+1:02d} {m['role']}: {m['content'][:300]}")  # show up to 300 chars per message



===== Conversation history (after automatic truncation/summarization if triggered) =====
01 system: AutoSummary: 
02 user: Now add more messages to trigger the periodic summarization feature.
03 assistant: I'll show how summarization happens after every 3rd user message.
04 user: Message A
05 user: Message B
06 user: Message C
07 user: Message D
08 assistant: Now we've added several messages.


-------------------------------
**TASK 2: JSON Schema Classification & Extraction via Function Calling**
-------------------------------

**Define the JSON schema as an OpenAI Function declaration**

In [None]:
#Function schema for extraction (OpenAI function-calling format)

extract_function = {
    "type": "function",
    "function": {
        "name": "extract_contact_info",
        "description": "Extract contact and demographic fields (name, email, phone, location, age) from user text.",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "description": "Full name of the person (if present)."
                },
                "email": {
                    "type": "string",
                    "description": "Email address. Return empty string if not present."
                },
                "phone": {
                    "type": "string",
                    "description": "Phone number digits only or formatted. Return empty string if not present."
                },
                "location": {
                    "type": "string",
                    "description": "City, state or country the user mentioned (if any)."
                },
                "age": {
                    "anyOf": [
                        {"type": "integer"},
                        {"type": "null"}
                    ],
                    "description": "Age as integer if mentioned; null if not mentioned."
                }
            },
            "required": []  # allow partial extraction
        }
    }
}


**Helper to call model with function calling and parse results**

In [None]:
# Call the model with function calling and parse the returned arguments

def extract_with_function_call(user_text: str, model: str = "openai/gpt-oss-20b"):
    messages = [
        {"role": "system", "content": "You are a JSON extractor. Extract contact info into the requested schema."},
        {"role": "user", "content": user_text}
    ]
    resp = call_chat_model(messages=messages, model=model, temperature=0.0, max_tokens=512,
                       tools=[extract_function], tool_choice="auto")

    # Parse function_call arguments
    try:
        choice = resp['choices'][0]['message']
        # If model chose to call the function: it will put a function_call object
        if 'function_call' in choice and choice['function_call']:
            arg_str = choice['function_call'].get('arguments', '{}')
            extracted = json.loads(arg_str)
        else:
            content = choice.get('content', '')
            try:
                extracted = json.loads(content)
            except Exception:
                extracted = {}
    except Exception as e:
        print("Error parsing model response:", e)
        extracted = {}

    return extracted, resp


**Validation helpers for the extracted JSON**

In [None]:
#Extraction + Validation helpers

EMAIL_RE = re.compile(r"[^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+")

def extract_with_function_call(user_text: str, model: str = "gemma2-9b-it"):
    """
    Send text to model with tool schema and parse the structured JSON output.
    """
    messages = [
        {"role": "system", "content": "Extract the contact details into structured JSON."},
        {"role": "user", "content": user_text}
    ]

    resp = call_chat_model(
        messages=messages,
        model=model,
        temperature=0.0,
        max_tokens=512,
        tools=[extract_function],
        tool_choice="auto"
    )

    extracted = {}
    try:
        tool_calls = resp["choices"][0]["message"].get("tool_calls", [])
        if tool_calls:
            args_str = tool_calls[0]["function"]["arguments"]
            extracted = json.loads(args_str)
    except Exception as e:
        print("Parsing error:", e)

    return extracted, resp


def validate_extracted(data: dict) -> dict:
    """
    Validate fields in the extraction result. Return a dict with 'valid' True/False and issues found.
    """
    issues = []
    validated = {}

    # Name: accept if non-empty string
    name = data.get('name') or ""
    validated['name'] = name.strip() if isinstance(name, str) else ""
    if not validated['name']:
        issues.append("name_missing")

    # Email: basic regex
    email = data.get('email') or ""
    email = email.strip()
    validated['email'] = email
    if email and not EMAIL_RE.match(email):
        issues.append("email_invalid")

    # Phone: strip non-digit chars; ensure something remains
    phone_raw = data.get('phone') or ""
    phone_digits = re.sub(r"\D", "", phone_raw)
    validated['phone'] = phone_digits
    if phone_raw and not phone_digits:
        issues.append("phone_invalid")

    # Location: accept string
    location = data.get('location') or ""
    validated['location'] = location.strip() if isinstance(location, str) else ""

    # Age: ensure integer and sensible range (0 < age < 120)
    age_raw = data.get('age', None)
    age_val = None
    if age_raw is not None and age_raw != "":
        try:
            age_val = int(age_raw)
            if not (0 < age_val < 120):
                issues.append("age_out_of_range")
        except Exception:
            issues.append("age_not_integer")
            age_val = None
    validated['age'] = age_val

    return {"validated": validated, "issues": issues, "is_valid": len(issues) == 0}


**Demonstration: parse 3 sample chats**

In [None]:
#Three sample chats demonstrating extraction

samples = [
    # 1) All info in one sentence
    "Hello, I'm Rahul Sharma from Pune. I'm 22 years old. My email is rahul.sharma@example.com and my phone is +91-98765-43210.",
    # 2) Scattered across sentences (no age provided)
    "Hi, can you save my contact? Name: Anjali Verma. Email: anjali_verma123@gmail.com. I'm currently based in Mumbai. Contact number 9876543210.",
    # 3) Partial / different formats (no email)
    "Hey, this is Sunil. Phone (US): (415) 555-0123. I live in San Francisco. Age twenty nine."
]

for i, s in enumerate(samples, 1):
    print("\n--- Sample", i, "---")
    print("User text:", s)
    extracted, raw_resp = extract_with_function_call(s)
    print("Raw extracted JSON (model):", json.dumps(extracted, indent=2))
    validation = validate_extracted(extracted)
    print("Validation result:", validation)



--- Sample 1 ---
User text: Hello, I'm Rahul Sharma from Pune. I'm 22 years old. My email is rahul.sharma@example.com and my phone is +91-98765-43210.
Raw extracted JSON (model): {
  "age": 22,
  "email": "rahul.sharma@example.com",
  "location": "Pune",
  "name": "Rahul Sharma",
  "phone": "+91-98765-43210"
}
Validation result: {'validated': {'name': 'Rahul Sharma', 'email': 'rahul.sharma@example.com', 'phone': '919876543210', 'location': 'Pune', 'age': 22}, 'issues': [], 'is_valid': True}

--- Sample 2 ---
User text: Hi, can you save my contact? Name: Anjali Verma. Email: anjali_verma123@gmail.com. I'm currently based in Mumbai. Contact number 9876543210.
Raw extracted JSON (model): {
  "age": null,
  "email": "anjali_verma123@gmail.com",
  "location": "Mumbai",
  "name": "Anjali Verma",
  "phone": "9876543210"
}
Validation result: {'validated': {'name': 'Anjali Verma', 'email': 'anjali_verma123@gmail.com', 'phone': '9876543210', 'location': 'Mumbai', 'age': None}, 'issues': [], 'is