<a href="https://colab.research.google.com/github/BHOOMIJ256/Groq_conversation-assignment/blob/main/Conversation_Management_Classification_Groq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Task 1: Conversation Management

1. Maintain conversation history
2. Implement truncation (by turns/characters)
3. Periodic summarization every k conversations
4. Demonstrate with different settings

In [24]:
#Importing All necessary libraries

import json
import copy
from datetime import datetime
import re
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, asdict


In [25]:
#Install required pacakages

!pip install openai



In [26]:
from openai import OpenAI

In [51]:
#Config and Setup

GROQ_API_KEY = ""
GROQ_BASE_URL="https://api.groq.com/openai/v1"

#initializing groq client with OpenAi Compatibility
client = OpenAI(
    api_key=GROQ_API_KEY,
    base_url=GROQ_BASE_URL
)

MODEL_NAME = "llama-3.1-8b-instant"


In [52]:
#Task 1 : Conversation management with summarization

@dataclass
class ConversationTurn:
  """ Represents a single conversation turn"""
  role : str
  content : str
  timestamp: str
  turn_id: int

class ConversationManager:
  """
  Manages Conversation hostory with summarization capabilities,
  -Maintains running converstaion history
  -Truncation by turns or character length
  - Periodic summarization after k-th run
  -Customizable summarization options

  """

  def __init__(self,
               max_turns: Optional[int] = None,
               max_chars: Optional[int]= None,
               summarize_every_k: int = 3):

    """
    Initialize conversation manager

    Args:
    max_turns : Maximum number of conversation turn to keep
    max_chars : Maximum character length for conversation history
    summarize_every_k : Summarize every k-th conversation


    """
    self.conversation_history : List[ConversationTurn] = []
    self.max_turns = max_turns
    self.max_chars = max_chars
    self.summarize_every_k = summarize_every_k
    self.conversation_count = 0
    self.summary_history : List[str] = []
    self.turn_counter = 0

  def add_turn(self, role:str, content:str) -> None:
    self.turn_counter += 1
    turn = ConversationTurn(
        role=role,
        content=content,
        timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        turn_id=self.turn_counter
    )

    self.conversation_history.append(turn)

    #Apply truncation if needed
    self._apply_truncation()

    #Check for periodic summarization
    if role == "assistant":
      self.conversation_count += 1
      if self.conversation_count % self.summarize_every_k == 0:
        self._perform_conversation()

  def _apply_truncation(self) -> None:
    """Apply truncation based on configured limits"""
    #Truncate by number of turns

    if self.max_turns and len(self.conversation_history) > self.max_turns:
      self.conversation_history = self.conversation_history[-self.max_turns:]

    #Truncate by character length
    if self.max_chars:
      total_chars = sum(len(turn.content) for turn in self.conversation_history)
      while total_chars > self.max_chars and len(self.conversation_history) > 1:
        removed_turn = self.conversation_history.pop(0)
        total_chars -= len(removed_turn.content)

  def _perform_conversation(self) -> None:
    """Perform summarization of conversation history"""

    if len(self.conversation_history) < 2:
      return



    # Prepare conversation text for summarization
    conversation_text = self._format_conversation_for_summary()

    try:
            # Call Groq API for summarization
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful assistant that creates concise summaries of conversations. Summarize the key points, decisions, and important information from the conversation below. Keep it under 200 words."
                    },
                    {
                        "role": "user",
                        "content": f"Please summarize this conversation:\n\n{conversation_text}"
                    }
                ],
                max_tokens=250,
                temperature=0.3
            )

            summary = response.choices[0].message.content.strip()
            self.summary_history.append(summary)

            # Replace old conversation with summary
            self._replace_with_summary(summary)

            print(f"📝 Conversation summarized after {self.conversation_count} exchanges")
            print(f"Summary: {summary[:100]}...")

    except Exception as e:
            print(f"Error during summarization: {e}")

  def _format_conversation_for_summary(self) -> str:
        """Format conversation history for summarization"""
        formatted = []
        for turn in self.conversation_history:
            formatted.append(f"{turn.role.capitalize()}: {turn.content}")
        return "\n".join(formatted)

  def _replace_with_summary(self, summary: str) -> None:
        """Replace conversation history with summary"""
        # Keep the last 2 turns and add summary at the beginning
        recent_turns = self.conversation_history[-2:] if len(self.conversation_history) >= 2 else self.conversation_history

        # Create summary turn
        summary_turn = ConversationTurn(
            role="summary",
            content=f"[SUMMARY] {summary}",
            timestamp=datetime.now().isoformat(),
            turn_id=self.turn_counter + 1
        )

        self.conversation_history = [summary_turn] + recent_turns

  def get_conversation_history(self) -> List[Dict[str, Any]]:
        """Get formatted conversation history"""
        return [asdict(turn) for turn in self.conversation_history]

  def get_stats(self) -> Dict[str, Any]:
        """Get conversation statistics"""
        total_chars = sum(len(turn.content) for turn in self.conversation_history)
        return {
            "total_turns": len(self.conversation_history),
            "total_characters": total_chars,
            "conversation_count": self.conversation_count,
            "summaries_created": len(self.summary_history),
            "truncation_settings": {
                "max_turns": self.max_turns,
                "max_chars": self.max_chars,
                "summarize_every_k": self.summarize_every_k
            }
        }

#Task 2 : Information Extraction

1. JSON schema for 5 fields (name, email, phone, location, age)
2. Use function calling for structured extraction
3. Validate extracted data
4. Process multiple sample chats

In [53]:
# JSON Schema for Information Extraction
EXTRACTION_SCHEMA = {
    "name": "extract_user_information",
    "description": "Extract user information from chat conversations",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {
                "type": "string",
                "description": "User's full name if mentioned"
            },
            "email": {
                "type": "string",
                "description": "User's email address if mentioned"
            },
            "phone": {
                "type": "string",
                "description": "User's phone number if mentioned"
            },
            "location": {
                "type": "string",
                "description": "User's location (city, state, country) if mentioned"
            },
            "age": {
                "type": "integer",
                "description": "User's age if mentioned"
            }
        },
        "required": []
    }
}

class InformationExtractor:
    """
    Extracts structured information from chat conversations
    Uses Groq API with OpenAI function calling for structured outputs
    """

    def __init__(self):
        self.extracted_data: List[Dict[str, Any]] = []

    def extract_information(self, chat_content: str) -> Dict[str, Any]:
        """
        Extract information from chat content using function calling

        Args:
            chat_content: The chat conversation text

        Returns:
            Dictionary containing extracted information
        """
        try:
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert information extractor. Extract user information from the conversation. Only extract information that is explicitly mentioned. If information is not present, don't include it in the response."
                    },
                    {
                        "role": "user",
                        "content": f"Extract user information from this conversation:\n\n{chat_content}"
                    }
                ],
                functions=[EXTRACTION_SCHEMA],
                function_call={"name": "extract_user_information"},
                temperature=0.1
            )

            # Parse the function call response
            function_call = response.choices[0].message.function_call
            if function_call and function_call.name == "extract_user_information":
                extracted_info = json.loads(function_call.arguments)

                # Clean up empty values
                cleaned_info = {k: v for k, v in extracted_info.items() if v is not None and v != ""}

                self.extracted_data.append({
                    "chat_content": chat_content,
                    "extracted_info": cleaned_info,
                    "extraction_time": datetime.now().isoformat()
                })

                return cleaned_info
            else:
                return {}

        except Exception as e:
            print(f"Error during information extraction: {e}")
            return {}

    def validate_extraction(self, extracted_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Validate extracted data against schema

        Args:
            extracted_data: The extracted information dictionary

        Returns:
            Validation results with errors and warnings
        """
        validation_result = {
            "is_valid": True,
            "errors": [],
            "warnings": []
        }

        schema_props = EXTRACTION_SCHEMA["parameters"]["properties"]

        for key, value in extracted_data.items():
            if key not in schema_props:
                validation_result["errors"].append(f"Unexpected field: {key}")
                validation_result["is_valid"] = False
                continue

            expected_type = schema_props[key]["type"]

            # Type validation
            if expected_type == "string" and not isinstance(value, str):
                validation_result["errors"].append(f"Field '{key}' should be string, got {type(value)}")
                validation_result["is_valid"] = False
            elif expected_type == "integer" and not isinstance(value, int):
                validation_result["errors"].append(f"Field '{key}' should be integer, got {type(value)}")
                validation_result["is_valid"] = False

            # Content validation
            if key == "email" and "@" not in str(value):
                validation_result["warnings"].append(f"Email format may be invalid: {value}")
            elif key == "phone" and len(str(value).replace(" ", "").replace("-", "").replace("(", "").replace(")", "")) < 10:
                validation_result["warnings"].append(f"Phone number may be incomplete: {value}")
            elif key == "age" and (value < 0 or value > 150):
                validation_result["warnings"].append(f"Age value seems unrealistic: {value}")

        return validation_result

    def validate_extraction(self, extracted_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Validate extracted data against schema

        Args:
            extracted_data: The extracted information dictionary

        Returns:
            Validation results with errors and warnings
        """
        validation_result = {
            "is_valid": True,
            "errors": [],
            "warnings": []
        }

        schema_props = EXTRACTION_SCHEMA["parameters"]["properties"]

        for key, value in extracted_data.items():
            if key not in schema_props:
                validation_result["errors"].append(f"Unexpected field: {key}")
                validation_result["is_valid"] = False
                continue

            expected_type = schema_props[key]["type"]

            # Type validation
            if expected_type == "string" and not isinstance(value, str):
                validation_result["errors"].append(f"Field '{key}' should be string, got {type(value)}")
                validation_result["is_valid"] = False
            elif expected_type == "integer" and not isinstance(value, int):
                validation_result["errors"].append(f"Field '{key}' should be integer, got {type(value)}")
                validation_result["is_valid"] = False

            # Content validation
            if key == "email" and "@" not in str(value):
                validation_result["warnings"].append(f"Email format may be invalid: {value}")
            elif key == "phone" and len(str(value).replace(" ", "").replace("-", "").replace("(", "").replace(")", "")) < 10:
                validation_result["warnings"].append(f"Phone number may be incomplete: {value}")
            elif key == "age" and (value < 0 or value > 150):
                validation_result["warnings"].append(f"Age value seems unrealistic: {value}")

        return validation_result

#Demonstration and Testing


In [54]:
#Demonstration and testing

def demonstrate_task1():
    """Demonstrate Task 1: Conversation Management with Summarization"""
    print("="*80)
    print("TASK 1: CONVERSATION MANAGEMENT WITH SUMMARIZATION")
    print("="*80)

    # Test different truncation settings
    settings_to_test = [
        {"max_turns": 6, "max_chars": None, "summarize_every_k": 3},
        {"max_turns": None, "max_chars": 500, "summarize_every_k": 2},
        {"max_turns": 4, "max_chars": 300, "summarize_every_k": 2}
    ]

    sample_conversations = [
        ("user", "Hi! I'm looking for help with my Python project. I need to build a web scraper."),
        ("assistant", "I'd be happy to help you build a web scraper! What specific website are you trying to scrape, and what kind of data are you looking to extract?"),
        ("user", "I want to scrape job listings from a job board. I need to get the job title, company name, salary, and location."),
        ("assistant", "Great! For scraping job listings, I recommend using Python with libraries like requests and BeautifulSoup, or Selenium if the site uses JavaScript. Here's a basic approach: 1) Send HTTP requests to get the HTML, 2) Parse HTML with BeautifulSoup, 3) Extract the data you need using CSS selectors or XPath, 4) Store the data in a structured format like CSV or database."),
        ("user", "That sounds good. Can you show me some sample code?"),
        ("assistant", "Absolutely! Here's a basic example using requests and BeautifulSoup: [code example would go here]. This code sends a GET request, parses the HTML, and extracts job information using CSS selectors."),
        ("user", "Thanks! How do I handle pagination and multiple pages?"),
        ("assistant", "For pagination, you can: 1) Look for 'Next' buttons or page numbers in the HTML, 2) Extract the next page URL, 3) Use a loop to go through all pages, 4) Add delays between requests to be respectful to the server."),
        ("user", "What about handling errors and rate limiting?"),
        ("assistant", "Good question! For robust scraping: 1) Use try-except blocks for HTTP errors, 2) Implement exponential backoff for retries, 3) Add random delays between requests, 4) Check robots.txt file, 5) Handle different response status codes appropriately.")
    ]

    for i, settings in enumerate(settings_to_test):
      print(f"\n Testing Configuration {i + 1}:")
      print(f" Max Turns: {settings['max_turns']}")
      print(f"Max Chars: {settings['max_chars']}")
      print(f"Summarize Every : {settings['summarize_every_k']} exchanges")
      print("-"* 60)

      #Create conversation manager with current settings
      manager = ConversationManager(**settings)

      #Add sample conversations
      for role, content in sample_conversations:
        manager.add_turn(role, content)

        stats = manager.get_stats()
        print(f"After adding {role} turn : {stats['total_turns']} turns, {stats['total_characters']} chars")

        final_stats = manager.get_stats()
        print(f"\n Final Statistics:")
        print(f"   Total Turns: {final_stats['total_turns']}")
        print(f"   Total Characters: {final_stats['total_characters']}")
        print(f"   Conversations: {final_stats['conversation_count']}")
        print(f"   Summaries Created: {final_stats['summaries_created']}")

        # Show conversation history
        print(f"\n Final Conversation History:")
        history = manager.get_conversation_history()
        for turn in history:
            role = turn['role']
            content = turn['content'][:100] + "..." if len(turn['content']) > 100 else turn['content']
            print(f"   {role.upper()}: {content}")

        print("\n" + "="*80)





In [57]:
def demonstrate_task2():
  """Demonstrate Task 2 : JSON Schema Classification & Information Extraction"""
  print("="*80)
  print("TASK 2: JSON SCHEMA CLASSIFICATION & INFORMATION EXTRACTION")
  print("="*80)

  sample_chats = [
        """
        User: Hi there! I'm Bhoomi Jain and I'm interested in your services.
        Assistant: Hello Bhoomi! I'd be happy to help. Could you tell me more about what you're looking for?
        User: I'm 20 years old and I live in Mumbai,Maharashtra. My email is bhoomi@gmai.com
        Assistant: Thanks for the information! I'll send you some details to bhoomi@gmail.com
        User: Great! You can also reach me at (555) 123-4567 if needed.
        """,

        """
        User: Hello, I need support with my account.
        Assistant: I can help you with that. Can I get some information to verify your account?
        User: Sure, I'm Sahil Katiyar, I'm 19 years old
        Assistant: Thank you Sahil. What's the best way to contact you?
        User: You can email me at sahil@company.com or call me at 555-987-6543
        User: I'm currently based in Austin, Texas
        """,

        """
        User: I want to sign up for your newsletter
        Assistant: Great! I'll need some basic information.
        User: My name is Mike Wilson
        Assistant: Thanks Mike! What's your email address?
        User: It's mike.wilson@gmail.com
        Assistant: Perfect! Are you comfortable sharing your location for localized content?
        User: I'm in New York, NY and I'm 42 years old
        """
    ]

    # Initialize information extractor
  extractor = InformationExtractor()

  print("Extracting Information from Sample Chats:\n")

  for i, chat in enumerate(sample_chats, 1):
        print(f"Processing Chat {i}:")
        print("-" * 40)

        # Extract information
        extracted_info = extractor.extract_information(chat.strip())

        # Validate extraction
        validation_result = extractor.validate_extraction(extracted_info)

        print(f"Chat Preview: {chat[:150]}...")
        print(f"\nExtracted Information:")
        if extracted_info:
            for key, value in extracted_info.items():
                print(f"   {key.capitalize()}: {value}")
        else:
            print("   No information extracted")

        print(f"\nValidation Results:")
        print(f"   Valid: {validation_result['is_valid']}")
        if validation_result['errors']:
            print(f"   Errors: {validation_result['errors']}")
        if validation_result['warnings']:
            print(f"   Warnings: {validation_result['warnings']}")

        print("\n" + "="*60)

    # Show overall extraction statistics
  print(f"\nExtraction Statistics:")
  print(f"   Total chats processed: {len(extractor.extracted_data)}")

    # Aggregate extracted fields
  all_fields = {}
  for record in extractor.extracted_data:
      for field in record['extracted_info'].keys():
          all_fields[field] = all_fields.get(field, 0) + 1

  print(f"   Fields extracted:")
  for field, count in all_fields.items():
      print(f"     {field.capitalize()}: {count} times")


def run_complete_demonstration():
    """Run both tasks with comprehensive demonstration"""
    print("Starting Complete Assignment Demonstration")
    print("Author: [Your Name]")
    print("Assignment: Conversation Management & Classification using Groq API")
    print("Date:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    print("\n")

    try:
        # Task 1 Demonstration
        demonstrate_task1()

        print("\n\n")

        # Task 2 Demonstration
        demonstrate_task2()

        print("\n" + "="*80)
        print("ASSIGNMENT COMPLETED SUCCESSFULLY!")
        print("="*80)
        print("\nSummary of Implementation:")
        print("✓ Task 1: Conversation Management with Summarization")
        print("  - Configurable truncation (turns/characters)")
        print("  - Periodic summarization after k exchanges")
        print("  - Multiple configuration testing")
        print("\n✓ Task 2: JSON Schema Information Extraction")
        print("  - OpenAI function calling with Groq API")
        print("  - Schema validation")
        print("  - Multiple chat processing")
        print("\n Technologies Used:")
        print("  - Groq API with OpenAI SDK compatibility")
        print("  - Python standard libraries")
        print("  - JSON schema validation")
        print("  - Function calling for structured outputs")

    except Exception as e:
        print(f"Error during demonstration: {e}")
        print("Please check your API key and internet connection")



#Code Execution

In [58]:
if __name__ == "__main__":
    run_complete_demonstration()

Starting Complete Assignment Demonstration
Author: [Your Name]
Assignment: Conversation Management & Classification using Groq API
Date: 2025-09-13 09:37:50


TASK 1: CONVERSATION MANAGEMENT WITH SUMMARIZATION

 Testing Configuration 1:
 Max Turns: 6
Max Chars: None
Summarize Every : 3 exchanges
------------------------------------------------------------
After adding user turn : 1 turns, 79 chars

 Final Statistics:
   Total Turns: 1
   Total Characters: 79
   Conversations: 0
   Summaries Created: 0

 Final Conversation History:
   USER: Hi! I'm looking for help with my Python project. I need to build a web scraper.

After adding assistant turn : 2 turns, 222 chars

 Final Statistics:
   Total Turns: 2
   Total Characters: 222
   Conversations: 1
   Summaries Created: 0

 Final Conversation History:
   USER: Hi! I'm looking for help with my Python project. I need to build a web scraper.
   ASSISTANT: I'd be happy to help you build a web scraper! What specific website are you trying t