In [1]:
!pip install openai  # Installs the library




# TASK 1: Conversation Summarization & Truncation

In [37]:
import os
import openai
import json

# Setup Groq API key
os.environ["GROQ_API_KEY"] = "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"
# Setup client
client = openai.OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=os.environ["GROQ_API_KEY"]
)



- Chat history tracking with OpenAI/Groq API.
- Summarization after every k turns (k=3).
- Truncation by maximum number of turns.
- Truncation by maximum number of characters.


In [38]:

class ChatHistory:
    def __init__(self, model="llama-3.1-8b-instant", k=3):
        self.history = []
        self.model = model
        self.k = k  # summarize every k user turns
        self.turn_count = 0

    def add_turn(self, user_msg):
        # Add user message
        self.history.append({"role": "user", "content": user_msg})

        # Get AI response
        response = client.chat.completions.create(model=self.model, messages=self.history)
        ai_msg = response.choices[0].message.content
        self.history.append({"role": "assistant", "content": ai_msg})
        self.turn_count += 1

        print(f"\nUser: {user_msg}\nAI: {ai_msg}\n---")

        # Summarize every k user turns
        if self.turn_count % self.k == 0:
            summary = self._summarize()
            print(f"🔹 Summarized after {self.turn_count} turns: {summary}")
            self.history = [
                {"role": "system", "content": f"Conversation so far (summarized): {summary}"}
            ]

        return ai_msg

    def _summarize(self):
        # Ask model to summarize history
        resp = client.chat.completions.create(
            model=self.model,
            messages=self.history + [
                {"role": "user", "content": "Summarize our chat so far in 1-2 sentences."}
            ]
        )
        return resp.choices[0].message.content

    def truncate(self, max_turns=None, max_chars=None):
        if max_turns:
            self.history = self.history[-max_turns:]
            print(f"📌 Truncated to {max_turns} turns → {len(self.history)} messages")

        if max_chars:
            total_chars = sum(len(m["content"]) for m in self.history)
            while total_chars > max_chars and len(self.history) > 1:
                self.history.pop(0)
                total_chars = sum(len(m["content"]) for m in self.history)
            print(f"📌 Truncated to {max_chars} chars → {len(self.history)} messages")



- Two demos are included:
1. Truncation by turns (max_turns=4).
2. Truncation by characters (max_chars=3000)

In [39]:

# === Demo ===
sample_msgs = [
    "Hello, who are you?",
    "Can you tell me a joke?",
    "What is AI?",
    "Summarize this in one line.",
    "Thanks!"
]


In [40]:

print("\n=== Demo 1: Truncation by Turns ===")
chat = ChatHistory(k=3)
for msg in sample_msgs:
    chat.add_turn(msg)
    chat.truncate(max_turns=4)



=== Demo 1: Truncation by Turns ===

User: Hello, who are you?
AI: Hello. I am an artificial intelligence (AI) model designed to provide information and assist with tasks. I'm a large language model, which means I've been trained on a massive dataset of text from the internet to understand and respond to a wide range of questions and topics. I'm here to help and provide any assistance or information you might need. What would you like to talk about?
---
📌 Truncated to 4 turns → 2 messages

User: Can you tell me a joke?
AI: Why couldn't the bicycle stand up by itself? 

Because it was two-tired.
---
📌 Truncated to 4 turns → 4 messages

User: What is AI?
AI: AI, or Artificial Intelligence, refers to the development of computer systems that can perform tasks that typically require human intelligence, such as:

1. Learning: AI systems can learn from data and improve their performance over time.
2. Problem-solving: AI systems can solve complex problems and make decisions.
3. Reasoning: AI 

In [41]:

print("\n=== Demo 2: Truncation by Chars ===")
chat = ChatHistory(k=3)
for msg in sample_msgs:
    chat.add_turn(msg)
    chat.truncate(max_chars=3000)


=== Demo 2: Truncation by Chars ===

User: Hello, who are you?
AI: I'm an artificial intelligence model known as a large language model (LLM) or a conversational AI. I'm a computer program designed to simulate human-like conversations, answer questions, provide information, and generate text. I'm here to assist you with any topic or task you'd like to discuss or work on.

I don't have a personal identity or emotions like humans do, but I'm designed to be helpful and engaging. I can help with language-related tasks such as writing, grammar, and translation, and can provide information on a wide range of subjects, from science and history to entertainment and culture.

How can I assist you today?
---
📌 Truncated to 3000 chars → 2 messages

User: Can you tell me a joke?
AI: Here's a joke:

What do you call a fake noodle?

(wait for it...)

An impasta!

Hope that made you laugh or at least crack a smile! I have plenty more where that came from if you want to hear another one.
---
📌 Trunca

# Task 2

# Task 2: JSON Schema Classification & Information Extraction

This notebook demonstrates:
- Extracting structured details (name, email, phone, location, age) from free-text chats.
- Using Groq API (LLaMA-3.1-8b-instant) with JSON schema tools for structured output.
- Validating extracted data against the schema:
  - **Present keys** → fields correctly extracted
  - **Missing keys** → expected but not found
  - **Extra keys** → outside the defined schema
- Fallback regex extraction if API call fails.


In [42]:
import os
import openai
import json
import re

# Setup Groq API key
os.environ["GROQ_API_KEY"] = "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"  # replace with your key

# Setup Groq client
client = openai.OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=os.environ["GROQ_API_KEY"]
)

# Define JSON schema as a "tool"
tools = [
    {
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": "Extract user details from chat text, only include fields explicitly mentioned with valid values.",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "User's name, e.g., 'Alice'"},
                    "email": {"type": "string", "description": "User's valid email, e.g., 'user@example.com'"},
                    "phone": {"type": "string", "description": "User's phone number, e.g., '123-456-7890'"},
                    "location": {"type": "string", "description": "User's location, e.g., 'NYC'"},
                    "age": {"type": "integer", "description": "User's age as a number, e.g., 25"}
                },
                "required": []
            }
        }
    }
]

# Fallback regex extraction
def text_based_extraction(chat_text):
    extracted = {}
    name_match = re.search(r"(?:I'm|my name is)\s*([A-Z][a-z]+)", chat_text, re.IGNORECASE)
    if name_match:
        extracted["name"] = name_match.group(1)

    email_match = re.search(r"\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b", chat_text)
    if email_match:
        extracted["email"] = email_match.group(1)

    phone_match = re.search(r"\b(\d{3}-\d{3}(?:-\d{4})?)\b", chat_text)
    if phone_match:
        extracted["phone"] = phone_match.group(1)

    location_match = re.search(r"\bfrom\s+([A-Z][a-zA-Z\s]+)\b", chat_text, re.IGNORECASE)
    if location_match:
        extracted["location"] = location_match.group(1).strip()

    age_match = re.search(r"\b(\d{1,3})\s*(?:years old|year old)\b", chat_text)
    if age_match:
        extracted["age"] = int(age_match.group(1))

    return extracted


# Function to extract details with Groq API
def extract_from_chat(chat_text, max_retries=2):
    for attempt in range(max_retries):
        try:
            messages = [
                {"role": "system", "content": "You are a strict information extractor. Only return explicitly mentioned details in JSON."},
                {"role": "user", "content": chat_text}
            ]

            response = client.chat.completions.create(
                model="llama-3.1-8b-instant",
                messages=messages,
                tools=tools,
                tool_choice={"type": "function", "function": {"name": "extract_info"}}
            )

            tool_call = response.choices[0].message.tool_calls
            if tool_call:
                args = tool_call[0].function.arguments
                extracted = json.loads(args)

                # Local schema validation
                valid_extracted = {}
                for k, v in extracted.items():
                    if v is None or v == "":
                        continue
                    if k == "age" and not isinstance(v, int):
                        continue
                    if k == "email" and ("@" not in v or "." not in v):
                        continue
                    valid_extracted[k] = v
                return valid_extracted

            return {}
        except Exception as e:
            print(f"Groq extraction failed (attempt {attempt+1}): {e}")
            continue

    # Fallback if Groq fails
    return text_based_extraction(chat_text)



### Demos
Five sample chats are tested:
1. Complete details (all fields present)  
2. Partial details (some fields missing)  
3. No details (all fields missing)  
4. Invalid details (skips invalid values)  
5. Some details (name + location only)  

The output shows both the **extracted JSON** and the **validation results**.


In [43]:

# === Demo with sample chats ===
samples = [
    "Hi, I'm Alice, alice@email.com, 25 years old, from NYC, phone 123-456.",
    "Hello, my name is Bob, I'm 30.",
    "Just chatting, no details.",
    "Hi, I'm Charlie, email: charlie@, age: thirty",
    "Hey, my name is Dave from LA, let's talk about AI!"
]



In [45]:

print("\n=== Task 2: JSON Schema Classification & Information Extraction ===\n")
for sample in samples:
    extracted = extract_from_chat(sample)
    print(f"Chat: {sample}\nExtracted: {json.dumps(extracted, indent=2)}\n")
    # Validation
    schema_keys = {"name", "email", "phone", "location", "age"}
    present = set(extracted.keys())
    missing = schema_keys - present
    extra = present - schema_keys
    print(f"Validation: Present keys {present}, Missing keys {missing}, Extra keys {extra}\n---")



=== Task 2: JSON Schema Classification & Information Extraction ===

Chat: Hi, I'm Alice, alice@email.com, 25 years old, from NYC, phone 123-456.
Extracted: {
  "age": 25,
  "email": "alice@email.com",
  "location": "NYC",
  "name": "Alice",
  "phone": "123-456"
}

Validation: Present keys {'name', 'email', 'phone', 'location', 'age'}, Missing keys set(), Extra keys set()
---
Chat: Hello, my name is Bob, I'm 30.
Extracted: {}

Validation: Present keys set(), Missing keys {'location', 'phone', 'email', 'name', 'age'}, Extra keys set()
---
Chat: Just chatting, no details.
Extracted: {}

Validation: Present keys set(), Missing keys {'location', 'phone', 'email', 'name', 'age'}, Extra keys set()
---
Chat: Hi, I'm Charlie, email: charlie@, age: thirty
Extracted: {
  "age": 30,
  "name": "Charlie"
}

Validation: Present keys {'age', 'name'}, Missing keys {'phone', 'email', 'location'}, Extra keys set()
---
Chat: Hey, my name is Dave from LA, let's talk about AI!
Extracted: {}

Validation: P