In [15]:
!pip install -q openai
!pip install -q jsonschema

In [2]:
#@markdown ### Imports & API-key loader
import openai
import os
from datetime import datetime

# Read the secret that Colab stores for us
from google.colab import userdata
openai.api_key = userdata.get("GROQ_API_KEY")

# Point the client to Groq's endpoint (v1/chat/completions)
client = openai.OpenAI(
    api_key=openai.api_key,
    base_url="https://api.groq.com/openai/v1"
)

print("✅ Groq client ready – base URL set to", client.base_url)

✅ Groq client ready – base URL set to https://api.groq.com/openai/v1/


In [6]:
#@markdown ### ConversationManager (turn limit, char limit, k-run summarisation)
class ConversationManager:
    def __init__(self,
                 turn_limit=None,   # keep last N messages (None = unlimited)
                 char_limit=None,   # keep last X chars (None = unlimited)
                 k_summarise=3):    # summarise every k-th exchange
        self.turn_limit = turn_limit
        self.char_limit = char_limit
        self.k_summarise = k_summarise
        self.history = []          # list of dicts: {"role":"user"|"assistant", "content":"..."}
        self.run_counter = 0       # how many user/assistant pairs we have seen

    # --------------- public helpers ---------------
    def add_user(self, text):
        self.history.append({"role": "user", "content": text})

    def add_assistant(self, text):
        self.history.append({"role": "assistant", "content": text})
        self.run_counter += 1
        # periodic summarisation
        if self.run_counter % self.k_summarise == 0:
            self._summarise_internal()

    # --------------- truncation helpers ---------------
    def _apply_turn_limit(self):
        if self.turn_limit:
            # keep last N messages
            self.history = self.history[-self.turn_limit:]

    def _apply_char_limit(self):
        if self.char_limit:
            # pop from the top until under limit
            while self.history and len(str(self.history)) > self.char_limit:
                self.history.pop(0)

    # --------------- summarisation ---------------
    def _summarise_internal(self):
        """Ask Groq for a concise summary, replace history with [summary, last_message]."""
        # build a single string of the current history
        transcript = "\n".join(f"{m['role']}: {m['content']}" for m in self.history)
        prompt = (
            "Summarise the following conversation in 2-3 short sentences. "
            "Retain key facts, names, preferences.\n\n" + transcript
        )

        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",  # any Groq model
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=120
        )
        summary = response.choices[0].message.content.strip()

        # keep the summary + the very last user/assistant pair
        last_two = self.history[-2:] if len(self.history) >= 2 else self.history
        self.history = [{"role": "system", "content": f"Summary so far: {summary}"}] + last_two
        print(f"📝  Auto-summarised at run #{self.run_counter}")
        print("   Summary:", summary)

    # --------------- pretty print ---------------
    def show(self):
        self._apply_turn_limit()
        self._apply_char_limit()
        print("\n------ Current history ------")
        for m in self.history:
            print(f"{m['role'].upper()}: {m['content']}")
        print("------------------------------\n")

# quick sanity test
cm = ConversationManager()
print("✅ ConversationManager class defined & tiny test instance created.")

✅ ConversationManager class defined & tiny test instance created.


In [7]:
#@markdown ### Sample chat #1 (3 turns)
cm = ConversationManager(turn_limit=None, char_limit=None, k_summarise=3)

cm.add_user("Hi, my name is Ayaan and I love pizza.")
cm.add_assistant("Nice to meet you, Ayaan! What topping do you like most?")
cm.add_user("Pepperoni, definitely. I also enjoy hiking on weekends.")
cm.add_assistant("Great combo—pepperoni pizza after a hike! Where do you usually trek?")
cm.add_user("Mostly around Lake Tahoe. The views are amazing.")
cm.add_assistant("Tahoe is stunning. I hope you get good weather next time!")

cm.show()

📝  Auto-summarised at run #3
   Summary: Ayaan, who loves pizza, particularly pepperoni, enjoys hiking on weekends. He usually treks around Lake Tahoe, appreciating its stunning views.

------ Current history ------
SYSTEM: Summary so far: Ayaan, who loves pizza, particularly pepperoni, enjoys hiking on weekends. He usually treks around Lake Tahoe, appreciating its stunning views.
USER: Mostly around Lake Tahoe. The views are amazing.
ASSISTANT: Tahoe is stunning. I hope you get good weather next time!
------------------------------



In [8]:
#@markdown ### Turn-limit demo (keep last 3 messages)
cm = ConversationManager(turn_limit=3, k_summarise=10)  # no auto-summary yet

cm.add_user("Turn 1: I like red cars.")
cm.add_assistant("Red is a bold choice!")
cm.add_user("Turn 2: I also play guitar.")
cm.add_assistant("Nice! Acoustic or electric?")
cm.add_user("Turn 3: Electric. Mostly rock.")
cm.add_assistant("Rock on! Any favourite bands?")

print("After 5 messages, keeping only last 3:")
cm.show()

After 5 messages, keeping only last 3:

------ Current history ------
ASSISTANT: Nice! Acoustic or electric?
USER: Turn 3: Electric. Mostly rock.
ASSISTANT: Rock on! Any favourite bands?
------------------------------



In [9]:
#@markdown ### Character-limit demo (max ~200 chars)
cm = ConversationManager(char_limit=200, k_summarise=99)  # no auto-summary

for i in range(1, 6):
    cm.add_user(f"Short message number {i}")
    cm.add_assistant(f"Reply number {i}")

print("History trimmed to ~200 characters:")
cm.show()
print("Raw list length:", len(str(cm.history)))

History trimmed to ~200 characters:

------ Current history ------
ASSISTANT: Reply number 4
USER: Short message number 5
ASSISTANT: Reply number 5
------------------------------

Raw list length: 159


In [10]:
#@markdown ### Combined limits + k=2 summarisation
cm = ConversationManager(turn_limit=4, char_limit=300, k_summarise=2)

# Turn 1
cm.add_user("I want to learn Japanese.")
cm.add_assistant("Great goal! How much time can you study daily?")

# Turn 2  -> triggers summary (k=2)
cm.add_user("About 30 min.")
cm.add_assistant("Perfect—consistency beats cramming.")

# Turn 3
cm.add_user("Should I start with hiragana?")
cm.add_assistant("Absolutely, master hiragana first.")

cm.show()

📝  Auto-summarised at run #2
   Summary: A user expressed their desire to learn Japanese and was advised by an assistant to prioritize consistency over cramming, suggesting that studying 30 minutes daily is a good starting point.

------ Current history ------
USER: About 30 min.
ASSISTANT: Perfect—consistency beats cramming.
USER: Should I start with hiragana?
ASSISTANT: Absolutely, master hiragana first.
------------------------------



In [12]:
#task2: starts here
#@markdown ### JSON schema for structured extraction
user_schema = {
    "type": "object",
    "properties": {
        "name":    {"type": "string"},
        "email":   {"type": "string"},
        "phone":   {"type": "string"},
        "location":{"type": "string"},
        "age":     {"type": "integer"}
    },
    "required": ["name", "email", "phone", "location", "age"],
    "additionalProperties": False
}

# Wrap it inside the 'function' format OpenAI expects
functions = [
    {
        "name": "extract_user_details",
        "description": "Extract user-supplied personal details from chat.",
        "parameters": user_schema
    }
]

print("✅ Schema defined and wrapped for function-calling.")

✅ Schema defined and wrapped for function-calling.


In [13]:
#@markdown ### Extraction helper
import json

def extract_details(chat_text: str):
    """Return dict with keys: name, email, phone, location, age."""
    messages = [{"role": "user", "content": chat_text}]
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        functions=functions,
        function_call={"name": "extract_user_details"}  # force the call
    )
    # the LLM reply contains the function call
    func_call = response.choices[0].message.function_call
    if not func_call or func_call.name != "extract_user_details":
        raise RuntimeError("Function call not returned")
    args = json.loads(func_call.arguments)
    return args

print("✅ extract_details() ready.")

✅ extract_details() ready.


In [14]:
#@markdown ### Sample chats for extraction
samples = [
    "Hey, I'm Alice Smith, 28 years old. You can reach me at alice@example.com or call 415-555-1234. I live in San Francisco.",
    "My name is Rohan Patel. I'm 35, based in Mumbai. Email: rohan.p@mail.io and phone +91 98765 43210.",
    "Hi there! Lucy Brown, 22, from London. lucy.brown@uk.net – mobile 07911 123456."
]

for idx, chat in enumerate(samples, 1):
    print(f"\n----- Sample {idx} -----")
    print("Input:", chat)
    try:
        data = extract_details(chat)
        print("Extracted JSON:")
        print(json.dumps(data, indent=2))
    except Exception as e:
        print("Extraction failed:", e)


----- Sample 1 -----
Input: Hey, I'm Alice Smith, 28 years old. You can reach me at alice@example.com or call 415-555-1234. I live in San Francisco.
Extracted JSON:
{
  "age": 28,
  "email": "alice@example.com",
  "location": "San Francisco",
  "name": "Alice Smith",
  "phone": "415-555-1234"
}

----- Sample 2 -----
Input: My name is Rohan Patel. I'm 35, based in Mumbai. Email: rohan.p@mail.io and phone +91 98765 43210.
Extracted JSON:
{
  "age": 35,
  "email": "rohan.p@mail.io",
  "location": "Mumbai",
  "name": "Rohan Patel",
  "phone": "+91 98765 43210"
}

----- Sample 3 -----
Input: Hi there! Lucy Brown, 22, from London. lucy.brown@uk.net – mobile 07911 123456.
Extracted JSON:
{
  "age": 22,
  "email": "lucy.brown@uk.net",
  "location": "London",
  "name": "Lucy Brown",
  "phone": "07911 123456"
}


In [16]:
#@markdown ### Validate extracted data against schema
from jsonschema import validate, ValidationError

def validate_data(data):
    try:
        validate(instance=data, schema=user_schema)
        return True, None
    except ValidationError as ve:
        return False, ve.message

# re-run extraction + validation for the same samples
for idx, chat in enumerate(samples, 1):
    data = extract_details(chat)
    ok, err = validate_data(data)
    print(f"Sample {idx} valid? {ok}")
    if not ok:
        print("  Error:", err)
    else:
        print("  ✅ Passed")

Sample 1 valid? True
  ✅ Passed
Sample 2 valid? True
  ✅ Passed
Sample 3 valid? True
  ✅ Passed
