In [2]:
!pip install --quiet openai

import os
import json
import time
from typing import List, Dict, Any, Optional
import uuid
from dataclasses import dataclass, field
MODE = "mock"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
if MODE == "live" and not OPENAI_API_KEY:
    raise RuntimeError("Set OPENAI_API_KEY in env/Colab before switching to live mode")

import openai
if OPENAI_API_KEY:
    openai.api_key = OPENAI_API_KEY

In [3]:
@dataclass
class Message:
    role: str
    content: str
    id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])

class ConversationManager:
    def __init__(self):
        self.history: List[Message] = []
        self.run_counter = 0

    def add_message(self, role: str, content: str):
        self.history.append(Message(role=role, content=content))

    def get_last_n_turns(self, n: int) -> List[Message]:
        return self.history[-n*2:] if n > 0 else []

    def truncate_by_chars(self, max_chars: int) -> List[Message]:
        kept = []
        chars = 0
        for msg in reversed(self.history):
            if chars + len(msg.content) > max_chars:
                break
            kept.append(msg)
            chars += len(msg.content)
        return list(reversed(kept))

    def summarize_history_with_llm(self, prompt_extra: str = "") -> str:
        combined = "\n\n".join([f"{m.role}: {m.content}" for m in self.history])
        prompt = (
            "Summarize the following user-assistant conversation in 2-4 concise sentences, "
            "preserving key facts. " + prompt_extra + "\n\nConversation:\n" + combined
        )
        if MODE == "mock":
            snippet = " | ".join([m.content[:60].replace("\n", " ") for m in self.history[-6:]])
            summary = f"(MOCK SUMMARY) Recent points: {snippet}"
            return summary
        else:
            resp = openai.ChatCompletion.create(
                model="gpt-4o-mini",
                messages=[{"role":"user","content":prompt}],
                max_tokens=200,
                temperature=0.2
            )
            text = resp.choices[0].message.content.strip()
            return text

    def periodic_summarize(self, k: int):
        self.run_counter += 1
        if self.run_counter % k == 0:
            summary = self.summarize_history_with_llm(f"Perform periodic summary at run {self.run_counter}.")
            self.history = [Message(role="assistant", content=f"[SUMMARY after run {self.run_counter}]\n{summary}")]
            return summary
        return None

    def get_history_text(self):
        return "\n".join([f"{m.role}: {m.content}" for m in self.history])


In [4]:
cm = ConversationManager()

sample_pairs = [
    ("user","Hi, I'm looking for help with my ML model evaluation."),
    ("assistant","Sure — what's the model and data like?"),
    ("user","It's a classification model trained on images; performance drops on new lighting."),
    ("assistant","Consider data augmentation, normalization; also check distribution shift."),
    ("user","How to implement augmentation in PyTorch?"),
    ("assistant","You can use torchvision transforms; here's a sample snippet..."),
]

for role, text in sample_pairs:
    cm.add_message(role, text)

print("=== Full history ===")
print(cm.get_history_text())

last_2_turns = cm.get_last_n_turns(2)
print("\n=== Last 2 turns (approx) ===")
for m in last_2_turns:
    print(f"{m.role}: {m.content}")

trunc = cm.truncate_by_chars(200)
print("\n=== Truncated by 200 chars ===")
for m in trunc:
    print(f"{m.role}: {m.content}")

print("\n=== Periodic summarization demo ===")
for i in range(1,6):
    cm.add_message("user", f"New question #{i}: example followup info.")
    cm.add_message("assistant", f"Assistant reply #{i}.")
    summary = cm.periodic_summarize(k=3)
    if summary:
        print(f"\n--- Summarized at run {cm.run_counter} ---")
        print(summary)
        print("\nCurrent history after summarization:")
        print(cm.get_history_text())


=== Full history ===
user: Hi, I'm looking for help with my ML model evaluation.
assistant: Sure — what's the model and data like?
user: It's a classification model trained on images; performance drops on new lighting.
assistant: Consider data augmentation, normalization; also check distribution shift.
user: How to implement augmentation in PyTorch?
assistant: You can use torchvision transforms; here's a sample snippet...

=== Last 2 turns (approx) ===
user: It's a classification model trained on images; performance drops on new lighting.
assistant: Consider data augmentation, normalization; also check distribution shift.
user: How to implement augmentation in PyTorch?
assistant: You can use torchvision transforms; here's a sample snippet...

=== Truncated by 200 chars ===
assistant: Consider data augmentation, normalization; also check distribution shift.
user: How to implement augmentation in PyTorch?
assistant: You can use torchvision transforms; here's a sample snippet...

=== Peri

**TASK 2**

In [6]:
SCHEMA = {
    "name": {"type": "string", "required": True},
    "email": {"type": "string", "required": True},
    "phone": {"type": "string", "required": False},
    "location": {"type": "string", "required": False},
    "age": {"type": "integer", "required": False}
}

def simple_validate_schema(result: dict, schema: dict) -> (bool, list):
    errors = []
    for key, meta in schema.items():
        if meta.get("required") and key not in result:
            errors.append(f"Missing required field: {key}")
        if key in result:
            t = type(result[key])
            expected = meta["type"]
            if expected == "integer" and not isinstance(result[key], int):
                try:
                    result[key] = int(result[key])
                except Exception:
                    errors.append(f"Field {key} expected integer but got {t}")
            if expected == "string" and not isinstance(result[key], str):
                errors.append(f"Field {key} expected string but got {t}")
    return (len(errors) == 0, errors)

sample_chats = [
    "Hi, I'm Alice Johnson. My email is alice.johnson@mail.com and I live in Wonderland. I'm 25 years old. Contact: 12345-irrelevant",
    "Hello, name: Bob Smith; email:bob.smith@gmail.com. Phone: xyz-0000. Based in Metropolis.",
    "User: Charlie Brown. Email: charlie.brown@example.com. No phone. Age 28."
]

def extract_with_llm(chat_text: str) -> dict:
    if MODE == "mock":
        out = {"name": None, "email": None, "phone": None, "location": None, "age": None}
        import re
        m = re.search(r'[\w\.-]+@[\w\.-]+', chat_text)
        if m: out["email"] = m.group(0)
        p = re.search(r'\+?\d[\d\-\s]{7,}\d', chat_text)
        if p: out["phone"] = p.group(0).replace(" ", "")
        age = re.search(r'(\bage\b[: ]*|I\'m |I am )(\d{1,2})', chat_text, flags=re.I)
        if age:
            out["age"] = int(age.group(2))
        name = re.search(r"(I'm|I am|name[: ]|User:)\s*([A-Z][\w\s\.']{2,40})", chat_text)
        if name:
            out["name"] = name.group(2).strip(" .;")
        loc = re.search(r'in ([A-Za-z ]+)\.?', chat_text)
        if loc:
            out["location"] = loc.group(1).strip()
        if not out["name"]:
            first_tokens = chat_text.split()[:3]
            out["name"] = " ".join(first_tokens)
        return out
    else:
        function_def = {
            "name": "extract_info",
            "description": "Extract contact details into JSON per provided schema",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "email": {"type": "string"},
                    "phone": {"type": "string"},
                    "location": {"type": "string"},
                    "age": {"type": "integer"},
                },
                "required": ["name", "email"]
            }
        }
        messages = [
            {"role":"system","content":"You are a helpful extractor. Return only JSON via function calling."},
            {"role":"user","content":f"Extract fields from this chat: ```{chat_text}```"}
        ]
        resp = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=messages,
            functions=[function_def],
            function_call={"name":"extract_info"},
            temperature=0
        )
        func_call = resp.choices[0].message.get("function_call", {})
        args = func_call.get("arguments", "{}")
        try:
            parsed = json.loads(args)
        except:
            parsed = {}
        return parsed

for i, chat in enumerate(sample_chats, 1):
    extracted = extract_with_llm(chat)
    valid, errors = simple_validate_schema(extracted, SCHEMA)
    print(f"\nSample chat #{i}:")
    print(chat)
    print("Extracted JSON:", json.dumps(extracted, indent=2))
    print("Validation:", "OK" if valid else "FAIL", errors)



Sample chat #1:
Hi, I'm Alice Johnson. My email is alice.johnson@mail.com and I live in Wonderland. I'm 25 years old. Contact: 12345-irrelevant
Extracted JSON: {
  "name": "Alice Johnson. My email is alice.johnson",
  "email": "alice.johnson@mail.com",
  "phone": null,
  "location": "Wonderland",
  "age": 25
}
Validation: FAIL ["Field phone expected string but got <class 'NoneType'>"]

Sample chat #2:
Hello, name: Bob Smith; email:bob.smith@gmail.com. Phone: xyz-0000. Based in Metropolis.
Extracted JSON: {
  "name": "Bob Smith",
  "email": "bob.smith@gmail.com.",
  "phone": null,
  "location": "Metropolis",
  "age": null
}
Validation: FAIL ["Field phone expected string but got <class 'NoneType'>", "Field age expected integer but got <class 'NoneType'>"]

Sample chat #3:
User: Charlie Brown. Email: charlie.brown@example.com. No phone. Age 28.
Extracted JSON: {
  "name": "Charlie Brown. Email",
  "email": "charlie.brown@example.com.",
  "phone": null,
  "location": null,
  "age": 28
}
V