**Setup the API key required for the Tasks:**

In [54]:
GROQ_API_KEY = "insert_API_key" #Redacted my API key for security reasons

# **TASK 1: Managing Conversation History with Summarization**

**1. Setup Libraries:-**

-- Importing Basic Libraries without any high-level frameworks.

In [40]:
import json
import os
from typing import List, Dict, Any, Optional, Union
from datetime import datetime, timezone
import re
import requests

**2. Class Definitions and Function Definitions:-**

-- Defines the ConversationHistoryManager class and helper functions (print_truncated, print_summary, etc.). These handle conversation history, truncation, summarization, and display formatting.

In [41]:
class ConversationHistoryManager:
    def __init__(
        self,
        groq_api_key: Optional[str] = None,
        groq_base_url: str = "https://api.groq.com/openai/v1",
        verbose: bool = True,
        force_extractive: bool = False,
        ):
        self.history: List[Dict[str, Any]] = []
        self.run_count: int = 0
        self.archive: List[Dict[str, Any]] = []

        # API config
        self.groq_api_key = groq_api_key or GROQ_API_KEY or os.getenv("GROQ_API_KEY")
        self.groq_base_url = groq_base_url
        self.verbose = bool(verbose)
        self.force_extractive = bool(force_extractive)

        # Truncation & periodic config
        self.max_turns: Optional[int] = None
        self.max_characters: Optional[int] = None
        self.max_words: Optional[int] = None
        self.summarization_interval: Optional[int] = None  # 'k' value: every k messages

        self._log("ConversationHistoryManager initialized.")
        self._log(f"Groq base URL: {self.groq_base_url}")
        if self.groq_api_key:
            self._log("Groq API key found. Using Abstractive summaries until forced otherwise.")
        else:
            self._log("No Groq API key found. Using Extractive fallback.")
        self._log("Assumption: k counts messages (add_message calls).")

    def _log(self, *args, **kwargs):
      if self.verbose:
          print(*args, **kwargs)

    # message handling
    def add_message(self, role: str, content: str, metadata: Optional[Dict] = None):
        msg = {
            "id": len(self.history) + 1,
            "role": role,
            "content": content,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "metadata": metadata or {}
        }
        self.history.append(msg)
        self.run_count += 1
        if self.verbose:
            self._log(f"Added {role} message (run #{self.run_count}): {content[:60]}...")

        if self.summarization_interval and self.run_count % self.summarization_interval == 0:
            self._log(f"Triggering periodic summarization at run #{self.run_count}")
            self._periodic_summarization()

    # config
    def configure_truncation_by_turns(self, max_turns: Optional[int]):
        #Treating None or non-positive as "no truncation"
        if max_turns is None or max_turns <= 0:
            self.max_turns = None
            self._log("Configured truncation by turns: disabled (max_turns <= 0 --- No truncation)")
        else:
            self.max_turns = int(max_turns)
            self._log(f"Configured truncation by turns: {self.max_turns} messages")
        self.max_characters = None
        self.max_words = None

    def configure_truncation_by_length(self, max_characters: Optional[int] = None, max_words: Optional[int] = None):
        self.max_characters = int(max_characters) if max_characters and max_characters > 0 else None
        self.max_words = int(max_words) if max_words and max_words > 0 else None
        self.max_turns = None
        parts = []
        if self.max_characters:
            parts.append(f"{self.max_characters} chars")
        if self.max_words:
            parts.append(f"{self.max_words} words")
        self._log(f"Configured truncation by length: {', '.join(parts) if parts else 'none'}")

    def configure_periodic_summarization(self, k: Optional[int]):
        if k is None or k <= 0:
            self.summarization_interval = None
            self._log("Configured periodic summarization: disabled (k <= 0 --- No summarization)")
        else:
          self.summarization_interval = int(k)
          self._log(f"Configured periodic summarization: every {self.summarization_interval} messages")


    def get_history_stats(self) -> Dict[str, Any]:
        total_chars = sum(len(msg['content']) for msg in self.history)
        total_words = sum(len(msg['content'].split()) for msg in self.history)
        return {
            "total_messages": len(self.history),
            "total_characters": total_chars,
            "total_words": total_words,
            "run_count": self.run_count
        }

    def display_history(self, title: str = "Current Conversation History"):
        stats = self.get_history_stats()
        print("\n" + "="*60)
        print(title)
        print("="*60)
        print(f"Stats: {stats['total_messages']} messages, {stats['total_characters']} chars, {stats['total_words']} words")
        if self.history:
            print("First message timestamp (full):", self.history[0]['timestamp'])
            print("Last message timestamp (full):", self.history[-1]['timestamp'])
        print(f"Run count: {stats['run_count']}")
        print("-"*60)
        for i, msg in enumerate(self.history, 1):
            ts = msg['timestamp'][:19]
            content_preview = msg['content'] if len(msg['content']) <= 200 else msg['content'][:200] + "..."
            print(f"{i}. [{ts}] {msg['role'].upper()}: {content_preview}")
        print("="*60)

    # truncation
    def apply_truncation(self, verbose: Optional[bool] = None) -> List[Dict[str, Any]]:
        #If no truncation configured --- Return full copy
        if self.max_turns:
            return self._truncate_by_turns()
        if self.max_characters or self.max_words:
            return self._truncate_by_length()
        #Default --- no truncation
        return self.history.copy()

    def _truncate_by_turns(self) -> List[Dict[str, Any]]:
        if self.max_turns is None:
            self._log("No truncation configured (max_turns is disabled).")
            return self.history.copy()
        if len(self.history) <= self.max_turns:
            self._log(f"No truncation needed ({len(self.history)} <= {self.max_turns})")
            return self.history.copy()
        truncated = self.history[-self.max_turns :]
        self._log(f"Truncated to last {self.max_turns} messages (removed {len(self.history) - self.max_turns})")
        return truncated

    def _truncate_by_length(self) -> List[Dict[str, Any]]:
        truncated: List[Dict[str, Any]] = []
        total_chars = 0
        total_words = 0

        # Walk from most recent backwards (LIFO) and include until limit reached.
        for msg in reversed(self.history):
            msg_chars = len(msg["content"])
            msg_words = len(msg["content"].split())

            would_exceed_chars = self.max_characters is not None and (total_chars + msg_chars > self.max_characters)
            would_exceed_words = self.max_words is not None and (total_words + msg_words > self.max_words)

            # If adding this message would exceed limits
            if would_exceed_chars or would_exceed_words:
                # If nothing included yet, keep the message anyway (preserve last message).
                if not truncated:
                    truncated.insert(0, msg)
                    total_chars += msg_chars
                    total_words += msg_words
                break

            truncated.insert(0, msg)
            total_chars += msg_chars
            total_words += msg_words

        removed_count = len(self.history) - len(truncated)
        if removed_count > 0:
            limit_desc = []
            if self.max_characters:
                limit_desc.append(f"{self.max_characters} chars")
            if self.max_words:
                limit_desc.append(f"{self.max_words} words")
            self._log(f"Truncated by length limits ({', '.join(limit_desc)}): removed {removed_count} messages")
        else:
            self._log("No truncation needed - within length limits")

        return truncated

    def summarize_history(self, history_to_summarize: Optional[List[Dict[str, Any]]] = None, use_truncated: bool = True) -> str:
        if history_to_summarize is None:
            history_to_summarize = self.apply_truncation() if use_truncated else self.history

        if not history_to_summarize:
            return "[EMPTY HISTORY]"

        if self.force_extractive or not self.groq_api_key:
            self._log("Using extractive summarizer (forced or no API key).")
            return self._simple_extractive_summary(history_to_summarize)

        conversation_text = self._format_history_for_summarization(history_to_summarize)
        return self._groq_abstractive_summary(conversation_text, history_to_summarize)

    def _format_history_for_summarization(self, history: List[Dict[str, Any]]) -> str:
        lines = []
        for msg in history:
            role = msg["role"].capitalize()
            content = msg["content"].strip()
            lines.append(f"{role}: {content}")
        return "\n".join(lines)

    def _simple_extractive_summary(self, history: List[Dict[str, Any]]) -> str:
        # Very small histories -> return EXTRACTIVE
        if len(history) <= 3:
            body = self._format_history_for_summarization(history)
            return f"EXTRACTIVE SUMMARY: {body}"

        all_text = " ".join(msg["content"] for msg in history)
        sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", all_text) if s.strip()]

        scored = []
        for i, s in enumerate(sentences):
            if len(s) < 8:
                continue
            score = len(s.split()) + (1.0 / (i + 1))
            scored.append((score, s))

        scored.sort(reverse=True)
        top = [s for _, s in scored[:3]]
        summary = "EXTRACTIVE SUMMARY: " + " ".join(top)
        return (summary[:1000] + "...") if len(summary) > 1000 else summary

    def _groq_abstractive_summary(self, conversation_text: str, history_ref: List[Dict[str, Any]]) -> str:
        # Early fallback if no API key
        if not self.groq_api_key:
            self._log("No GROQ_API_KEY - falling back to extractive.")
            return self._simple_extractive_summary(history_ref)

        headers = {
            "Authorization": f"Bearer {self.groq_api_key}",
            "Content-Type": "application/json",
        }

        prompt = (
            "You will be given a short conversation between USER and ASSISTANT. "
            "Produce a concise, factual summary using ONLY the text given. "
            "Do NOT invent facts or add examples. Keep the summary under 120 words and under 600 characters. "
            "If the model cannot produce a faithful concise summary, return the most important sentences verbatim.\n\n"
            "### CONVERSATION START\n"
            f"{conversation_text}\n"
            "### CONVERSATION END\n\nSUMMARY:"
        )

        payload = {
            "model": "llama-3.1-8b-instant",
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 140,
            "temperature": 0.2,
        }

        try:
            resp = requests.post(f"{self.groq_base_url}/chat/completions", headers=headers, json=payload, timeout=30)
            if resp.status_code != 200:
                self._log(f"Groq API returned status {resp.status_code}: {resp.text}")
                return self._simple_extractive_summary(history_ref)

            data = resp.json()
            summary_text = None
            try:
                summary_text = data["choices"][0]["message"]["content"].strip()
            except Exception:
                try:
                    summary_text = data["choices"][0]["text"].strip()
                except Exception:
                    summary_text = str(data.get("result") or data.get("output") or "").strip()

            if not summary_text:
                self._log("Could not extract text from Groq response -> falling back to extractive.")
                return self._simple_extractive_summary(history_ref)

            # If abstractive is longer than source, fallback for compression
            src_norm = re.sub(r'\s+', ' ', conversation_text).strip()
            sum_norm = re.sub(r'\s+', ' ', summary_text).strip()
            if len(sum_norm) > len(src_norm):
                self._log("Abstractive summary longer than (normalized) source -> falling back to extractive.")
                return self._simple_extractive_summary(history_ref)

            return f"ABSTRACTIVE SUMMARY: {summary_text}"

        except requests.exceptions.Timeout:
            self._log("Groq API request timed out — falling back to extractive summary.")
            return self._simple_extractive_summary(history_ref)
        except Exception as e:
            self._log(f"Error calling Groq API: {e} — falling back to extractive summary.")
            return self._simple_extractive_summary(history_ref)

    def _periodic_summarization(self):
        self._log(f"\n--- PERIODIC SUMMARIZATION (every {self.summarization_interval} messages) ---")
        self.display_history("History BEFORE Summarization (full)")
        history_for_summary = self.apply_truncation()
        preview = self._format_history_for_summarization(history_for_summary)
        self._log("\n--- Truncated view to be summarized (preview) ---")
        self._log(preview[:1200] + ("..." if len(preview) > 1200 else ""))

        if len(history_for_summary) <= 2:
            summary = self._simple_extractive_summary(history_for_summary)
            method = "extractive (forced for tiny view)"
        else:
            summary = self.summarize_history(history_for_summary, use_truncated=False)
            method = "extractive" if summary.startswith("EXTRACTIVE SUMMARY") else "abstractive"

        self._log("\nGenerated summary:")
        self._log("-" * 40)
        self._log(summary)
        self._log(f"(Method detected: {method})")
        self._log("-" * 40)

        original = self.history.copy()
        summary_message = {
            "role": "system",
            "content": f"[CONVERSATION SUMMARY] {summary}",
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "metadata": {"type": "periodic_summary", "original_message_count": len(original), "method": method},
        }
        self.history = [summary_message]
        archive_entry = {"timestamp": datetime.now(timezone.utc).isoformat(),
                         "summary_message": summary_message,
                         "original_history": original,
                         "method": method,
                        "summary_index": len(self.archive)+1
                         }
        self.archive.append(archive_entry)
        self._log(f"History replaced with summary. New message count: {len(self.history)}")
        self._log("--- END PERIODIC SUMMARIZATION ---\n")

    def get_archive(self) -> List[Dict[str, Any]]:
        return self.archive.copy()

In [42]:
def print_truncated(title: str, truncated: List[Dict[str, Any]], char_limit: Optional[int] = None, word_limit: Optional[int] = None):
    print("\n" + "="*60)
    print(title)
    print("-"*60)
    for i, msg in enumerate(truncated, 1):
        role = msg['role'].upper()
        content = msg['content']
        print(f"{i}. [{role}] {content}")
    total_chars = sum(len(m['content']) for m in truncated)
    total_words = sum(len(m['content'].split()) for m in truncated)
    limits = []
    if char_limit: limits.append(f"{char_limit} chars")
    if word_limit: limits.append(f"{word_limit} words")
    limits_str = f" ({', '.join(limits)})" if limits else ""
    print("-"*60)
    print(f"Kept {len(truncated)} messages, {total_chars} chars, {total_words} words{limits_str}")
    print("="*60)

def print_summary(name: str, summary: str, conv_text: Optional[str] = None):
    print("\n" + "-"*60)
    print(f"{name} SUMMARY")
    print("-"*60)
    if conv_text is not None:
        print(f"Source chars: {len(conv_text)}")
    print(summary)
    print("-"*60)

**3. Test Case 1: Basic History storage and Display**

-- Demonstrates basic conversation storage and display using the manager class. Shows how messages are added and tracked.

[All the example chats used in the test cases are generated via ChatGPT]

In [43]:
def test_basic_history(verbose: bool = True, force_extractive: bool = False) -> ConversationHistoryManager:
    mgr = ConversationHistoryManager(verbose=verbose, force_extractive=force_extractive)
    samples = [
        ("system", "You are a helpful AI assistant discussing technology trends."),
        ("user", "Hi, can you help me with Python installation?"),
        ("assistant", "Of course! Do you want to install it on Windows or Mac?"),
        ("user", "Windows."),
        ("assistant", "You can download the installer from python.org and run it."),
        ("user", "Which version should I choose?"),
        ("assistant", "The latest stable version, usually 3.x, is recommended.")
    ]
    for r, t in samples:
        mgr.add_message(r, t)
    mgr.display_history("Tech Support Conversation (Full)")
    print("\nSanity checks:", mgr.get_history_stats())
    return mgr

mgr = test_basic_history()

ConversationHistoryManager initialized.
Groq base URL: https://api.groq.com/openai/v1
Groq API key found. Using Abstractive summaries until forced otherwise.
Assumption: k counts messages (add_message calls).
Added system message (run #1): You are a helpful AI assistant discussing technology trends....
Added user message (run #2): Hi, can you help me with Python installation?...
Added assistant message (run #3): Of course! Do you want to install it on Windows or Mac?...
Added user message (run #4): Windows....
Added assistant message (run #5): You can download the installer from python.org and run it....
Added user message (run #6): Which version should I choose?...
Added assistant message (run #7): The latest stable version, usually 3.x, is recommended....

Tech Support Conversation (Full)
Stats: 7 messages, 311 chars, 53 words
First message timestamp (full): 2025-09-16T15:42:39.829231+00:00
Last message timestamp (full): 2025-09-16T15:42:39.829301+00:00
Run count: 7
-----------------

**4. Test Case 2: Summarization**

-- Displays the full conversation and generates a summary.

In [44]:
def test_summarization(manager: Optional[ConversationHistoryManager] = None, verbose: bool = True):
    mgr = manager or ConversationHistoryManager(verbose=verbose)
    mgr.display_history("Conversation to Summarize")
    summary = mgr.summarize_history()
    method = "extractive" if summary.startswith("EXTRACTIVE") else "abstractive"
    print(f"\n(Summary method: {method})")
    print("\nGENERATED SUMMARY")
    print("-" * 60)
    print(summary)
    print("-" * 60)
    return summary

summary = test_summarization(mgr)


Conversation to Summarize
Stats: 7 messages, 311 chars, 53 words
First message timestamp (full): 2025-09-16T15:42:39.829231+00:00
Last message timestamp (full): 2025-09-16T15:42:39.829301+00:00
Run count: 7
------------------------------------------------------------
1. [2025-09-16T15:42:39] SYSTEM: You are a helpful AI assistant discussing technology trends.
2. [2025-09-16T15:42:39] USER: Hi, can you help me with Python installation?
3. [2025-09-16T15:42:39] ASSISTANT: Of course! Do you want to install it on Windows or Mac?
4. [2025-09-16T15:42:39] USER: Windows.
5. [2025-09-16T15:42:39] ASSISTANT: You can download the installer from python.org and run it.
6. [2025-09-16T15:42:39] USER: Which version should I choose?
7. [2025-09-16T15:42:39] ASSISTANT: The latest stable version, usually 3.x, is recommended.

(Summary method: abstractive)

GENERATED SUMMARY
------------------------------------------------------------
ABSTRACTIVE SUMMARY: The user asked for help with Python installatio

**5. Test Case 3: Truncation policies**

-- Demonstrates truncation by character or word length. Ensures the most recent messages fit within specified limits.

In [45]:
def test_truncation(verbose: bool = True):
    mgr = ConversationHistoryManager(verbose=verbose)
    breakfast = [
        ("user", "Hey, what are some healthy breakfast options?"),
        ("assistant", "Oatmeal with fruits, yogurt with granola, or eggs with vegetables."),
        ("user", "I don’t like oatmeal."),
        ("assistant", "Then try yogurt parfaits with berries."),
        ("user", "Are smoothies healthy?"),
        ("assistant", "Yes, but avoid too much sugar. Use spinach, banana, and peanut butter."),
        ("user", "Can I drink coffee every day?"),
        ("assistant", "In moderation, yes. 1–2 cups is usually fine."),
        ("user", "Is skipping breakfast okay?"),
        ("assistant", "Some people do intermittent fasting, but a balanced breakfast is healthier for most."),
        ("user", "Can you make a one-week breakfast plan?"),
        ("assistant", "Sure! Day 1: eggs + toast, Day 2: yogurt parfait, Day 3: smoothie bowl, etc.")
    ]
    for r, t in breakfast:
        mgr.add_message(r, t)

    # Turn-based
    for turns in (3, 5, 7):
        mgr.configure_truncation_by_turns(turns)
        truncated = mgr.apply_truncation()
        print_truncated(f"Turn-based truncation: max {turns} messages", truncated)

    # Char-based
    for chars in (100, 200, 400):
        mgr.configure_truncation_by_length(max_characters=chars)
        truncated = mgr.apply_truncation()
        print_truncated(f"Character-based truncation: max {chars} chars", truncated, char_limit=chars)

    # Word-based
    for words in (30, 50, 80):
        mgr.configure_truncation_by_length(max_words=words)
        truncated = mgr.apply_truncation()
        print_truncated(f"Word-based truncation: max {words} words", truncated, word_limit=words)

    return mgr

truncate = test_truncation()

ConversationHistoryManager initialized.
Groq base URL: https://api.groq.com/openai/v1
Groq API key found. Using Abstractive summaries until forced otherwise.
Assumption: k counts messages (add_message calls).
Added user message (run #1): Hey, what are some healthy breakfast options?...
Added assistant message (run #2): Oatmeal with fruits, yogurt with granola, or eggs with veget...
Added user message (run #3): I don’t like oatmeal....
Added assistant message (run #4): Then try yogurt parfaits with berries....
Added user message (run #5): Are smoothies healthy?...
Added assistant message (run #6): Yes, but avoid too much sugar. Use spinach, banana, and pean...
Added user message (run #7): Can I drink coffee every day?...
Added assistant message (run #8): In moderation, yes. 1–2 cups is usually fine....
Added user message (run #9): Is skipping breakfast okay?...
Added assistant message (run #10): Some people do intermittent fasting, but a balanced breakfas...
Added user message (run #11)

**6. Test Case 4: Periodic Summarization (k-th Run)**

-- Illustrates periodic summarization (every k messages). Older history is archived, replaced by a concise summary.

In [46]:
def test_periodic_summarization(k: int = 3, verbose: bool = True) -> ConversationHistoryManager:
    pm = ConversationHistoryManager(verbose=verbose)
    pm.configure_periodic_summarization(k=k)
    pm.add_message("user", "I'm interested in learning about renewable energy sources.")
    pm.add_message("assistant", "Renewable energy comes from naturally replenishing sources like solar, wind, hydroelectric, and geothermal power.")
    pm.add_message("user", "What are the main benefits of solar power?")  # triggers
    pm.display_history("After 1st summarization")
    print("\nArchive length:", len(pm.get_archive()))

    pm.add_message("assistant", "Solar power benefits include: zero emissions during operation, reduced electricity bills over time, and low maintenance.")
    pm.add_message("user", "Are there disadvantages?")
    pm.add_message("assistant", "High upfront cost and weather dependency.")  # triggers
    pm.display_history("After 2nd summarization")
    print("\nArchive length:", len(pm.get_archive()))
    if pm.get_archive():
        print("\nArchive preview (first entry original messages):")
        print(json.dumps(pm.get_archive()[0]["original_history"][:3], indent=2))
    return pm

periodic = test_periodic_summarization(k=3)

ConversationHistoryManager initialized.
Groq base URL: https://api.groq.com/openai/v1
Groq API key found. Using Abstractive summaries until forced otherwise.
Assumption: k counts messages (add_message calls).
Configured periodic summarization: every 3 messages
Added user message (run #1): I'm interested in learning about renewable energy sources....
Added assistant message (run #2): Renewable energy comes from naturally replenishing sources l...
Added user message (run #3): What are the main benefits of solar power?...
Triggering periodic summarization at run #3

--- PERIODIC SUMMARIZATION (every 3 messages) ---

History BEFORE Summarization (full)
Stats: 3 messages, 213 chars, 30 words
First message timestamp (full): 2025-09-16T15:42:51.768554+00:00
Last message timestamp (full): 2025-09-16T15:42:51.768586+00:00
Run count: 3
------------------------------------------------------------
1. [2025-09-16T15:42:51] USER: I'm interested in learning about renewable energy sources.
2. [2025-09

**7. Test Case 5: Multiple samples & cross-testing**

-- Runs multiple conversation samples (e.g., programming, health) and compares summaries. May use abstractive or extractive methods depending on content size.

In [47]:
def test_multiple_samples(verbose: bool = True, force_extractive: bool = False):
    prog = ConversationHistoryManager(verbose=verbose, force_extractive=False)
    prog.add_message("system", "You are a coding mentor helping beginners.")
    prog.add_message("user", "I'm new to Python programming. Where should I start?")
    prog.add_message("assistant", "Start with variables, data types, and basic operators.")
    prog.add_message("user", "What's the difference between lists and dictionaries?")
    prog.add_message("assistant", "Lists are ordered; dictionaries are key-value pairs.")

    health = ConversationHistoryManager(verbose=verbose,force_extractive=False)
    health.add_message("system", "You are a wellness coach.")
    health.add_message("user", "I want to improve my health but don't know where to start.")
    health.add_message("assistant", "Start with consistent sleep, movement, and hydration.")
    health.add_message("user", "How important is diet compared to exercise?")
    health.add_message("assistant", "Diet typically has more impact on weight management.")

    for conv in (prog, health):
        conv.configure_truncation_by_turns(4)

    results = {}
    for name, conv in (("Programming", prog), ("Health", health)):
        truncated = conv.apply_truncation()
        src = conv._format_history_for_summarization(truncated)

        if force_extractive or conv.force_extractive:
            summary = conv._simple_extractive_summary(truncated)
            method = "extractive (forced)"
        else:
            summary = conv.summarize_history(history_to_summarize=truncated, use_truncated=False)
            method = "abstractive" if summary.startswith("ABSTRACTIVE") else "extractive"
        print("\n" + "-" * 60)
        print(f"{name} SUMMARY (method: {method})")
        print("-" * 60)
        print(summary)
        print("-" * 60)
        results[name.lower()] = {"summary": summary, "method": method, "source_chars": len(src)}
    return prog, health, results

m_samples = test_multiple_samples()

ConversationHistoryManager initialized.
Groq base URL: https://api.groq.com/openai/v1
Groq API key found. Using Abstractive summaries until forced otherwise.
Assumption: k counts messages (add_message calls).
Added system message (run #1): You are a coding mentor helping beginners....
Added user message (run #2): I'm new to Python programming. Where should I start?...
Added assistant message (run #3): Start with variables, data types, and basic operators....
Added user message (run #4): What's the difference between lists and dictionaries?...
Added assistant message (run #5): Lists are ordered; dictionaries are key-value pairs....
ConversationHistoryManager initialized.
Groq base URL: https://api.groq.com/openai/v1
Groq API key found. Using Abstractive summaries until forced otherwise.
Assumption: k counts messages (add_message calls).
Added system message (run #1): You are a wellness coach....
Added user message (run #2): I want to improve my health but don't know where to start....
A

# **TASK 2: JSON Schema Classification & Information Extraction**

**1. Setup Libraries, JSON Schema and Function Specifications:-**

-- Importing Basic Libraries without any high-level frameworks.

In [48]:
import json
import requests
from typing import Dict, Any, List, Optional
import jsonschema
from jsonschema import validate, ValidationError

-- Defining the structure of Extracted data to validate model output against strict rules

In [49]:
JSON_SCHEMA = {
    "title": "ChatInfoExtraction",
    "type": "object",
    "properties": {
        "name": {"type": "string", "minLength": 1, "description": "Full name of the person"},
        "email": {
            "type": "string",
            "pattern": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
            "description": "Email address"
        },
        "phone": {"type": "string", "description": "Phone number in various formats"},
        "location": {"type": "string", "description": "City, state, or address"},
        "age": {"type": "integer", "minimum": 1, "maximum": 120, "description": "Age in years"}
    },
    "required": ["name", "email"],
    "additionalProperties": False
}

-- Tells the model how to call extract_chat_info and return the structured fields in a tool call response.

In [50]:
FUNCTION_SPEC = {
    "type": "function",
    "function": {
        "name": "extract_chat_info",
        "description": "Extract structured information from chat messages including name, email, phone, location, and age",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Full name of the person mentioned in the chat"},
                "email": {"type": "string", "description": "Email address mentioned in the chat"},
                "phone": {"type": "string", "description": "Phone number mentioned in the chat"},
                "location": {"type": "string", "description": "Location, city, or address mentioned in the chat"},
                "age": {"type": "integer", "description": "Age of the person mentioned in the chat"}
            },
            "required": ["name", "email"]
        }
    }
}

-- Main class that runs the API call, parses tool calls, validates output with the schema, and returns the results.

In [51]:
class ChatInfoExtractor:
    def __init__(self, api_key: str, base_url: str = "https://api.groq.com/openai/v1", timeout: int = 30, verbose: bool = True):
        if not api_key:
            raise ValueError("api_key is required for real API calls.")
        self.api_key = GROQ_API_KEY
        self.base_url = base_url.rstrip("/")
        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        self.function_spec = FUNCTION_SPEC
        self.schema = JSON_SCHEMA
        self.timeout = timeout
        self.verbose = verbose

    def _log(self, *args, **kwargs):
        if self.verbose:
            print(*args, **kwargs)

    def extract_info_from_chat(self, chat_text: str) -> Dict[str, Any]:
        prompt_system = (
            "You are an expert at extracting structured information from chat messages. "
            "Return the structured result by invoking the provided function 'extract_chat_info'. "
            "If a field is not present in the text, omit it from the function arguments."
        )
        payload = {
            "model": "llama-3.3-70b-versatile",
            "messages": [
                {"role": "system", "content": prompt_system},
                {"role": "user", "content": f"Extract information from this chat message: {chat_text}"}
            ],
            "tools": [self.function_spec],
            "tool_choice": {"type": "function", "function": {"name": "extract_chat_info"}},
            "temperature": 0.0
        }

        try:
            resp = requests.post(f"{self.base_url}/chat/completions", headers=self.headers, json=payload, timeout=self.timeout)
        except Exception as e:
            return {"success": False, "error": f"Request error: {e}", "raw_response": None}

        self._log(f"API Response Status: {resp.status_code}")
        try:
            result = resp.json()
        except Exception as e:
            return {"success": False, "error": f"Invalid JSON response: {e}", "raw_response": resp.text}

        try:
            choices = result.get("choices", [])
            if not choices:
                return {"success": False, "error": "No choices in response", "raw_response": result}

            choice = choices[0]
            message = choice.get("message", {})
            tool_calls = message.get("tool_calls", []) or message.get("tool_calls", [])
            if not tool_calls:
                return {"success": False, "error": "No tool_calls found in message", "raw_response": result}

            tool_call = tool_calls[0]
            func_obj = tool_call.get("function", {})
            args_json = func_obj.get("arguments")
            if not args_json:
                return {"success": False, "error": "No function.arguments present in tool_call", "raw_response": result, "tool_call": tool_call}

            extracted_data = json.loads(args_json)
            return {"success": True, "raw_response": result, "tool_call": tool_call, "extracted_data": extracted_data}

        except Exception as e:
            return {"success": False, "error": f"Error parsing tool call: {e}", "raw_response": result}

    def validate_extracted_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
        try:
            validate(instance=data, schema=self.schema)
            return {"valid": True, "message": "Data validation passed", "validated_data": data}
        except ValidationError as e:
            return {
                "valid": False,
                "message": f"Validation failed: {e.message}",
                "error_path": list(e.path) if e.path else None,
                "validated_data": None
            }

    def process_chat(self, chat_text: str) -> Dict[str, Any]:
        self._log(f"Processing chat (preview): {chat_text[:80]}...")
        extraction = self.extract_info_from_chat(chat_text)
        if not extraction.get("success"):
            return {
                "chat_text": chat_text,
                "extraction_success": False,
                "extraction_error": extraction.get("error"),
                "raw_response": extraction.get("raw_response"),
                "validation_result": None
            }

        extracted = extraction["extracted_data"]
        validation = self.validate_extracted_data(extracted)
        return {
            "chat_text": chat_text,
            "extraction_success": True,
            "raw_tool_call": extraction.get("tool_call"),
            "extracted_data": extracted,
            "validation_result": validation,
            "raw_response": extraction.get("raw_response")
        }

-- Example chat texts (generated via ChatGPT), with cases such as complete, partial and varied, used to test extraction and validation behaviour.

In [52]:
SAMPLE_CHATS = [
    {
        "id": 1,
        "text": "Hi there! I'm Sarah Johnson and you can reach me at sarah.johnson@gmail.com. I'm 28 years old and live in New York City. My phone number is +1-555-123-4567.",
        "description": "Complete information - all fields present"
    },
    {
        "id": 2,
        "text": "Hello, my name is Mike Chen, email: mike.c@company.com. I'm based in San Francisco.",
        "description": "Partial information - missing phone and age"
    },
    {
        "id": 3,
        "text": "Hey, I'm Alex Thompson. You can call me at 555-987-6543 or email alex@example.com. I'm 35 and currently in Boston, MA.",
        "description": "Complete information with different formatting"
    }
]

-- Demonstartes end-to-end flow: extract info from chats, validate the JSON,and pront the results with a summary.

In [53]:
def extraction_pipeline(api_key: Optional[str] = None, verbose: bool = True):
    api_key = GROQ_API_KEY
    if not api_key:
        print("GROQ_API_KEY not found. To run the real API demonstration set the GROQ_API_KEY environment variable.")
        return []

    extractor = ChatInfoExtractor(api_key=api_key, verbose=verbose)
    results = []
    for i, chat in enumerate(SAMPLE_CHATS, start=1):
        print(f"\nSAMPLE CHAT {i}: {chat['description']}")
        print("-" * 60)
        print(f"Original: \"{chat['text']}\"\n")
        result = extractor.process_chat(chat["text"])
        results.append(result)

        if result["extraction_success"]:
            print("Raw tool_call:")
            print(json.dumps(result["raw_tool_call"], indent=2))
            print("\nExtracted JSON:")
            print(json.dumps(result["extracted_data"], indent=2))
            print("\nValidation:")
            if result["validation_result"]["valid"]:
                print("PASS -", result["validation_result"]["message"])
            else:
                print("FAIL -", result["validation_result"]["message"], "Path:", result["validation_result"].get("error_path"))
        else:
            print("Extraction failed:", result["extraction_error"])
        print("\n" + "=" * 60)

    total = len(results)
    successful = sum(1 for r in results if r.get("extraction_success"))
    valid = sum(1 for r in results if r.get("extraction_success") and r.get("validation_result", {}).get("valid"))
    print("\nSUMMARY:")
    print(f"Total chats: {total}")
    print(f"Successful extractions: {successful}")
    print(f"Valid extractions: {valid}")
    return results

results = extraction_pipeline()


SAMPLE CHAT 1: Complete information - all fields present
------------------------------------------------------------
Original: "Hi there! I'm Sarah Johnson and you can reach me at sarah.johnson@gmail.com. I'm 28 years old and live in New York City. My phone number is +1-555-123-4567."

Processing chat (preview): Hi there! I'm Sarah Johnson and you can reach me at sarah.johnson@gmail.com. I'm...
API Response Status: 200
Raw tool_call:
{
  "id": "zwvhp3a8p",
  "type": "function",
  "function": {
    "name": "extract_chat_info",
    "arguments": "{\"age\":28,\"email\":\"sarah.johnson@gmail.com\",\"location\":\"New York City\",\"name\":\"Sarah Johnson\",\"phone\":\"+1-555-123-4567\"}"
  }
}

Extracted JSON:
{
  "age": 28,
  "email": "sarah.johnson@gmail.com",
  "location": "New York City",
  "name": "Sarah Johnson",
  "phone": "+1-555-123-4567"
}

Validation:
PASS - Data validation passed


SAMPLE CHAT 2: Partial information - missing phone and age
---------------------------------------