<a href="https://colab.research.google.com/github/Ciberchamp/Conversation-summerizer/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# INSTALLATION & SETUP CELL

# Install required packages
!pip install groq openai

# Import necessary libraries
import json
import textwrap
from groq import Groq
from typing import Dict, List, Optional, Any
import re
from google.colab import userdata

# Initialize Groq client with secret key from Colab
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
client = Groq(api_key=GROQ_API_KEY)

print("✅ Setup complete! Groq client initialized.")
print("🔑 API Key loaded from Colab secrets")
print("📦 All packages installed successfully")

In [19]:
# TASK 1 - Managing Conversation History with Summarization

import json
import textwrap
from groq import Groq
import os

class ConversationManager:
    """
    Manages conversation history with truncation and periodic summarization.
    """
    def __init__(self, client, k_summarize=3, model="llama-3.1-8b-instant"):
        """
        Initializes the ConversationManager.

        Args:
            client: The Groq client instance
            k_summarize (int): Summarize the conversation after every k-th run.
            model (str): The Groq model to use for chat and summarization.
        """
        self.history = []
        self.run_count = 0
        self.k_summarize = k_summarize
        self.model = model

        # Store the Groq client
        self.client = client

        # Start with a system prompt to set the context
        self.history.append({
            "role": "system",
            "content": "You are a helpful assistant."
        })

    def add_message(self, role, content):
        """Adds a message to the conversation history."""
        self.history.append({"role": role, "content": content})

    def _call_groq_api(self, messages, max_tokens=500):
        """Helper method to call Groq API with error handling."""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=0.7
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error calling Groq API: {e}")
            return f"Sorry, I encountered an error: {str(e)}"

    def _summarize_history(self):
        """Internal method to summarize the conversation history using the Groq API."""
        print("\n" + "="*20 + " SUMMARIZING " + "="*20)

        # Convert history to a plain string for the summarization prompt
        history_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in self.history])

        prompt = [
            {
                "role": "system",
                "content": "You are an expert conversation summarizer. Create a concise summary of the following dialogue, retaining all key facts, context, and important details. The summary should be comprehensive enough to continue the conversation naturally."
            },
            {
                "role": "user",
                "content": f"Please summarize this conversation:\n\n{history_str}"
            }
        ]

        try:
            # Call Groq API for actual summarization
            summary = self._call_groq_api(prompt, max_tokens=300)
            print(f"Generated Summary: {summary}")

            # Replace history with the summary
            self.history = [
                {"role": "system", "content": f"This is a summary of the previous conversation: {summary}"}
            ]
            print("✅ History replaced with summary")
        except Exception as e:
            print(f"Error during summarization: {e}")
            # If summarization fails, keep the old history

        print("="*53 + "\n")

    def pretty_print_history(self, history=None, title="Current History"):
        """Pretty print conversation history with nice formatting."""
        if history is None:
            history = self.history

        print(f"\n{'='*60}")
        print(f" {title}")
        print(f"{'='*60}")

        for i, msg in enumerate(history, 1):
            role = msg['role'].upper()
            content = msg['content']

            # Color coding (works in most terminals)
            if role == "SYSTEM":
                color = "\033[95m"  # Magenta
            elif role == "USER":
                color = "\033[94m"   # Blue
            elif role == "ASSISTANT":
                color = "\033[92m"   # Green
            else:
                color = "\033[0m"    # Default

            reset_color = "\033[0m"

            print(f"\n{color}[{i}] {role}:{reset_color}")
            print("-" * 40)

            # Wrap long content to multiple lines (max 70 chars per line)
            wrapped_content = textwrap.fill(content, width=70,
                                          initial_indent="  ",
                                          subsequent_indent="  ")
            print(wrapped_content)

        print(f"\n{'='*60}\n")

    def simple_print_history(self, history=None, title="Current History"):
        """Simple pretty print without colors (for compatibility)."""
        if history is None:
            history = self.history

        print(f"\n{title}:")
        print("-" * len(title) + "-")

        for i, msg in enumerate(history, 1):
            role = msg['role'].upper()
            content = msg['content']

            print(f"\n[{i}] {role}:")
            # Wrap content nicely
            wrapped_content = textwrap.fill(content, width=70,
                                          initial_indent="    ",
                                          subsequent_indent="    ")
            print(wrapped_content)
        print()

    def chat(self, user_input):
        """
        Handles a single turn of conversation, including periodic summarization.
        """
        # 1. Add user message and increment run count
        self.add_message("user", user_input)
        self.run_count += 1

        # 2. Get assistant's response from Groq API
        assistant_response = self._call_groq_api(self.history)
        self.add_message("assistant", assistant_response)

        # 3. Return response first, then handle summarization externally
        return assistant_response

    def check_and_summarize(self):
        """Check if summarization is needed and perform it."""
        if self.run_count % self.k_summarize == 0:
            self._summarize_history()
            return True
        return False

    def get_truncated_history(self, max_turns=None, max_chars=None, max_words=None):
        """
        Returns a truncated version of the history.

        Args:
            max_turns (int): Keep only the last N turns. A turn is a user-assistant pair.
            max_chars (int): Keep only the most recent messages that fit within the character limit.
            max_words (int): Keep only the most recent messages that fit within the word limit.
        """
        if max_turns:
            # A turn is a user + assistant message, so 2*max_turns
            num_messages = max_turns * 2
            return self.history[-num_messages:]

        if max_chars:
            truncated = []
            current_chars = 0
            # Iterate backwards to get the most recent messages first
            for message in reversed(self.history):
                msg_len = len(message["content"])
                if current_chars + msg_len <= max_chars:
                    truncated.insert(0, message) # insert at the beginning to maintain order
                    current_chars += msg_len
                else:
                    break
            return truncated

        if max_words:
            truncated = []
            current_words = 0
            # Iterate backwards to get the most recent messages first
            for message in reversed(self.history):
                msg_words = len(message["content"].split())
                if current_words + msg_words <= max_words:
                    truncated.insert(0, message) # insert at the beginning to maintain order
                    current_words += msg_words
                else:
                    break
            return truncated

        return self.history

# --- Demonstration of Task 1 ---
print("## Task 1: Conversation Management Demonstration ##")

# Initialize manager to summarize after every 3rd run
manager = ConversationManager(client, k_summarize=3)

# --- Conversation Flow ---
print("--- Starting Conversation (Summarize every 3 runs) ---")

# Run 1
print(f"\n🗣️ [Run 1] User: What is the capital of France?")
response = manager.chat("What is the capital of France?")
print(f"🤖 [Run 1] Assistant: {response}")

# Run 2
print(f"\n🗣️ [Run 2] User: And what is its population?")
response = manager.chat("And what is its population?")
print(f"🤖 [Run 2] Assistant: {response}")

# Run 3 -> Summarization should trigger AFTER this run
print(f"\n🗣️ [Run 3] User: What's a famous landmark there?")
response = manager.chat("What's a famous landmark there?")
print(f"🤖 [Run 3] Assistant: {response}")

# Check and perform summarization AFTER printing the response
manager.check_and_summarize()

# Pretty print the history after summarization
manager.pretty_print_history(title="History after Run 3 (Summarized)")

# Run 4
print(f"\n🗣️ [Run 4] User: Thanks! Now, tell me about the capital of Japan.")
response = manager.chat("Thanks! Now, tell me about the capital of Japan.")
print(f"🤖 [Run 4] Assistant: {response}")
manager.check_and_summarize()

# Pretty print current history
manager.pretty_print_history(title="History after Run 4")

# --- Build More History for Truncation Demo ---
print("\n🔧 Building more conversation history for truncation demo...")

# Create a fresh manager with more conversations (no summarization to build up history)
demo_manager = ConversationManager(client, k_summarize=10)  # High number to avoid summarization

# Adding several more conversations to build up substantial history
more_conversations = [
    ("What's the weather like in Tokyo?", "Tokyo has a humid subtropical climate characterized by warm, wet summers and mild winters with occasional cold snaps. The city experiences a rainy season from June to mid-July, and typhoons are most common in September."),
    ("Tell me about Japanese cuisine", "Japanese cuisine, or washoku, is known for its emphasis on seasonality, quality ingredients, and presentation. Staple foods include rice, seafood, and pickled vegetables. Popular dishes include sushi, tempura, ramen, udon, soba, and donburi. Regional specialties vary across Japan's 47 prefectures."),
    ("What's the population of Tokyo?", "As of 2023, the Tokyo metropolitan area has a population of approximately 37.4 million people, making it the most populous metropolitan area in the world. The 23 special wards of Tokyo have about 9.7 million residents, while the broader prefecture has around 14 million people."),
    ("Any famous temples in Tokyo?", "Tokyo has several famous temples including Senso-ji in Asakusa (Tokyo's oldest temple), Meiji Shrine in Shibuya (dedicated to Emperor Meiji), Zojo-ji Temple next to Tokyo Tower, and Kanda Myojin Shrine in Chiyoda. These sites offer a glimpse into Japan's religious traditions and architectural heritage."),
    ("How about transportation in Tokyo?", "Tokyo has one of the most extensive and efficient public transportation systems in the world. It includes the JR East network (especially the Yamanote Line that circles central Tokyo), two subway systems (Tokyo Metro and Toei Subway), numerous private railways, and an extensive bus network. The system is known for its punctuality, cleanliness, and frequency of service.")
]

for i, (question, answer) in enumerate(more_conversations, 1):
    demo_manager.add_message("user", question)
    demo_manager.add_message("assistant", answer)
    demo_manager.run_count += 1

print(f"📊 Built history with {len(demo_manager.history)} total messages")

# --- Now Demonstrate Truncation ---
print("\n🔧 --- Demonstrating Truncation Options ---")

# Show original history first
print(f"\n📋 ORIGINAL FULL HISTORY ({len(demo_manager.history)} messages):")
demo_manager.simple_print_history(title="Full History")

# Calculate total characters and words in the full history
total_chars = sum(len(msg["content"]) for msg in demo_manager.history)
total_words = sum(len(msg["content"].split()) for msg in demo_manager.history)
print(f"Full history stats: {total_chars} characters, {total_words} words\n")

# a. Truncate by number of turns (last 2 turns only)
print(f"\n1️⃣ TRUNCATION BY TURNS (Last 2 turns only):")
truncated_by_turns = demo_manager.get_truncated_history(max_turns=2)
demo_manager.simple_print_history(truncated_by_turns, f"Truncated to 2 turns ({len(truncated_by_turns)} messages)")

# Calculate stats for truncated version
turn_chars = sum(len(msg["content"]) for msg in truncated_by_turns)
turn_words = sum(len(msg["content"].split()) for msg in truncated_by_turns)
print(f"Truncated stats: {turn_chars} characters, {turn_words} words (reduced by {total_chars-turn_chars} chars, {total_words-turn_words} words)")

# b. Truncate by character length (small limit to show truncation)
print(f"\n2️⃣ TRUNCATION BY CHARACTERS (Max 400 chars):")
truncated_by_chars = demo_manager.get_truncated_history(max_chars=400) # less than 400 shows 0 messages, fixing this for demo only
demo_manager.simple_print_history(truncated_by_chars, f"Truncated to ~400 chars ({len(truncated_by_chars)} messages)")

# Calculate stats for truncated version
char_chars = sum(len(msg["content"]) for msg in truncated_by_chars)
char_words = sum(len(msg["content"].split()) for msg in truncated_by_chars)
print(f"Truncated stats: {char_chars} characters, {char_words} words (reduced by {total_chars-char_chars} chars, {total_words-char_words} words)")

# c. Truncate by word length (small limit to show truncation)
print(f"\n3️⃣ TRUNCATION BY WORDS (Max 60 words):")
truncated_by_words = demo_manager.get_truncated_history(max_words=60) # less than this will display no message hence fixing 60 for demo
demo_manager.simple_print_history(truncated_by_words, f"Truncated to ~60 words ({len(truncated_by_words)} messages)")

# Calculate stats for truncated version
word_chars = sum(len(msg["content"]) for msg in truncated_by_words)
word_words = sum(len(msg["content"].split()) for msg in truncated_by_words)
print(f"Truncated stats: {word_chars} characters, {word_words} words (reduced by {total_chars-word_chars} chars, {total_words-word_words} words)")

print("\n✅ TRUNCATION DEMONSTRATION COMPLETE!")

## Task 1: Conversation Management Demonstration ##
--- Starting Conversation (Summarize every 3 runs) ---

🗣️ [Run 1] User: What is the capital of France?
🤖 [Run 1] Assistant: The capital of France is Paris.

🗣️ [Run 2] User: And what is its population?
🤖 [Run 2] Assistant: The population of Paris depends on whether you're referring to the city proper or the metropolitan area. 

As of the latest available data (2020), the city proper of Paris has a population of approximately 2.1 million people. 

The larger metropolitan area of Paris, which includes the city and its surrounding suburbs, has a population of around 12.2 million people.

🗣️ [Run 3] User: What's a famous landmark there?
🤖 [Run 3] Assistant: One of the most famous landmarks in Paris is the Eiffel Tower (La Tour Eiffel in French). It's a iconic iron lattice tower built for the 1889 World's Fair and stands at an impressive 324 meters (1,063 feet) tall. The Eiffel Tower is a symbol of Paris and one of the most recognizable l

In [20]:
# TASK 2 - INFORMATION EXTRACTION & DEMONSTRATION


class InformationExtractor:

    #Extracts structured information from chat conversations using Groq function calling.


    def __init__(self, model="llama-3.1-8b-instant"): # currently valid model in Groq
        self.model = model

        #Json logic for classification and extraction
        self.extraction_schema = {
            "name": "extract_user_information",
            "description": "Extract personal information from a chat conversation",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Full name of the person"
                    },
                    "email": {
                        "type": "string",
                        "description": "Email address"
                    },
                    "phone": {
                        "type": "string",
                        "description": "Phone number"
                    },
                    "location": {
                        "type": "string",
                        "description": "Location/address/city mentioned"
                    },
                    "age": {
                        "type": "integer",
                        "description": "Age of the person"
                    }
                },
                "required": []  # Making all fields optional for flexibility
            }
        }

    def extract_information(self, chat_text: str) -> Dict[str, Any]:

        # To extract required information from chat text using function calling.

        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert at extracting personal information from chat conversations. Extract any mentioned personal details like name, email, phone, location, and age. If information is not mentioned or unclear, leave those fields empty/null."
                    },
                    {
                        "role": "user",
                        "content": f"Extract personal information from this chat:\n\n{chat_text}"
                    }
                ],
                tools=[{"type": "function", "function": self.extraction_schema}],
                tool_choice={"type": "function", "function": {"name": "extract_user_information"}}
            )

            # Parsing the result
            tool_call = response.choices[0].message.tool_calls[0]
            extracted_info = json.loads(tool_call.function.arguments)

            return {
                "status": "success",
                "extracted_info": extracted_info,
                "confidence": "high" if len([v for v in extracted_info.values() if v]) >= 3 else "medium"
            }

        except Exception as e:
            return {
                "status": "error",
                "error": str(e),
                "extracted_info": {}
            }

    def validate_extraction(self, extracted_info: Dict) -> Dict[str, Any]:

        # Validation of extracted information.

        validation_results = {
            "valid_fields": {},
            "invalid_fields": {},
            "overall_valid": True
        }

        # Email validation
        if extracted_info.get("email"):
            email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
            if re.match(email_pattern, extracted_info["email"]):
                validation_results["valid_fields"]["email"] = extracted_info["email"]
            else:
                validation_results["invalid_fields"]["email"] = "Invalid email format"
                validation_results["overall_valid"] = False

        # Phone validation (basic)
        if extracted_info.get("phone"):
            phone_pattern = r'[\+]?[\d\s\-\(\)]{10,}'
            if re.search(phone_pattern, extracted_info["phone"]):
                validation_results["valid_fields"]["phone"] = extracted_info["phone"]
            else:
                validation_results["invalid_fields"]["phone"] = "Invalid phone format"
                validation_results["overall_valid"] = False

        # Age validation
        if extracted_info.get("age"):
            if isinstance(extracted_info["age"], int) and 0 < extracted_info["age"] < 150:
                validation_results["valid_fields"]["age"] = extracted_info["age"]
            else:
                validation_results["invalid_fields"]["age"] = "Invalid age range"
                validation_results["overall_valid"] = False

        # Name and location (basic presence check)
        for field in ["name", "location"]:
            if extracted_info.get(field) and len(extracted_info[field].strip()) > 0:
                validation_results["valid_fields"][field] = extracted_info[field]

        return validation_results

    def pretty_print_results(self, chat_text: str, result: Dict, validation: Dict):

        print(f"\n{'='*80}")
        print(" INFORMATION EXTRACTION RESULTS")
        print(f"{'='*80}")

        print(f"\n Original Chat Text:")
        print("-" * 40)
        wrapped_chat = textwrap.fill(chat_text, width=75, initial_indent="  ", subsequent_indent="  ")
        print(wrapped_chat)

        print(f"\n🔍 Extraction Status: {result['status'].upper()}")
        if result['status'] == 'success':
            print(f"🎯 Confidence: {result['confidence'].upper()}")

        print(f"\n📊 Extracted Information:")
        print("-" * 40)
        for key, value in result['extracted_info'].items():
            if value:
                status_icon = "✅" if key in validation['valid_fields'] else "❌"
                print(f"  {status_icon} {key.capitalize()}: {value}")
            else:
                print(f"   {key.capitalize()}: Not found")

        print(f"\n🔒 Validation Results:")
        print("-" * 40)
        print(f"  Overall Valid: {' YES' if validation['overall_valid'] else ' NO'}")
        print(f"  Valid Fields: {len(validation['valid_fields'])}")
        print(f"  Invalid Fields: {len(validation['invalid_fields'])}")

        if validation['invalid_fields']:
            for field, issue in validation['invalid_fields'].items():
                print(f" invalid {field}: {issue}")



# TASK 2 DEMONSTRATION
print("\n" + "="*70 )
print(" TASK 2: INFORMATION EXTRACTION DEMONSTRATION")
print( "="*70 + "\n")

# Initialize extractor
extractor = InformationExtractor()

# Sample chats for information extraction
sample_chats = [
    {
        "id": 1,
        "title": "Customer Support Registration",
        "text": """
        User: Hi, I need help setting up my account.
        Agent: Sure! I can help you with that. Can I get your name first?
        User: Yes, my name is John Smith and I'm 28 years old.
        Agent: Thanks John. What's your email address?
        User: It's john.smith@gmail.com and my phone number is +1-555-123-4567.
        Agent: Great! And what city are you located in?
        User: I'm in New York City.
        Agent: Perfect, I have all your information now.
        """
    },
    {
        "id": 2,
        "title": "Event Registration",
        "text": """
        Organizer: Welcome to our event registration! Please provide your details.
        Participant: Hello! I'm Maria Garcia, I'm 35 years old.
        Organizer: Great! Can I get your contact information?
        Participant: Sure, you can reach me at maria.garcia@yahoo.com
        Organizer: And your phone number?
        Participant: It's 555-987-6543. I'm calling from Los Angeles.
        Organizer: Thank you Maria, your registration is complete!
        """
    },
    {
        "id": 3,
        "title": "Incomplete Information Chat",
        "text": """
        Support: How can I help you today?
        User: I'm having trouble with my order.
        Support: I'd be happy to help! Can you provide your email?
        User: It's robert@invalid-email
        Support: I also need your phone number for verification.
        User: I don't want to share that right now.
        Support: That's okay. What's your name?
        User: Just call me Rob. I live somewhere in California.
        """
    }
]

# Process each sample chat
for i, sample in enumerate(sample_chats, 1):
    print(f"\n🎯 PROCESSING SAMPLE CHAT #{sample['id']}: {sample['title']}")
    print("="*80)

    # Extract information
    result = extractor.extract_information(sample['text'])

    # Validate extraction
    validation = extractor.validate_extraction(result['extracted_info'])

    # Pretty print results for formatted output
    extractor.pretty_print_results(sample['text'], result, validation)

    print(f"\n⏱️ Processing complete for sample {i}/3")



print(f"\n TASK 2 COMPLETED SUCCESSFULLY!")


 TASK 2: INFORMATION EXTRACTION DEMONSTRATION


🎯 PROCESSING SAMPLE CHAT #1: Customer Support Registration

 INFORMATION EXTRACTION RESULTS

 Original Chat Text:
----------------------------------------
           User: Hi, I need help setting up my account.         Agent:
  Sure! I can help you with that. Can I get your name first?         User:
  Yes, my name is John Smith and I'm 28 years old.         Agent: Thanks
  John. What's your email address?         User: It's john.smith@gmail.com
  and my phone number is +1-555-123-4567.         Agent: Great! And what
  city are you located in?         User: I'm in New York City.
  Agent: Perfect, I have all your information now.

🔍 Extraction Status: SUCCESS
🎯 Confidence: HIGH

📊 Extracted Information:
----------------------------------------
  ✅ Age: 28
  ✅ Email: john.smith@gmail.com
  ✅ Location: New York City
  ✅ Name: John Smith
  ✅ Phone: +1-555-123-4567

🔒 Validation Results:
----------------------------------------
  Overall Valid