# **Downloads and Imports**

In [29]:
!pip install --quiet requests openai jsonschema pydantic[email]

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/331.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.1/331.1 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [38]:
import json
from openai import OpenAI
from google.colab import userdata
from typing import List, Dict, Literal

# **Configurations and Model Instances**

In [4]:
CHAT_MODEL = "llama-3.3-70b-versatile"
STRUCTURED_OUTPUT_MODEL = "openai/gpt-oss-20b"

In [5]:
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key = userdata.get("GROQ_API_KEY")
)

# **Task 1: Managing Conversation History with Summarization**

- Message History Tracking
- Summarization
- Truncation

In [53]:
MESSAGE_HISTORY : List[dict] = []
SUMMARY = ""

## Function to add messages

In [None]:
summarization_interval = 8
turn_count = 0
def add_message(message):
    global turn_count
    role = message['role']
    MESSAGE_HISTORY.append(message)
    turn_count += 1
    print(f"Turn {turn_count}: Added {message}")

    if turn_count % summarization_interval == 0:
      print(f"Turn count {turn_count}, output: {turn_count % summarization_interval}")
      periodic_summarization()

## functionm to create summary

In [None]:
def generate_summary(messages):
    global SUMMARY
    global turn_count
    global MESSAGE_HISTORY

    if not MESSAGE_HISTORY:
        print("No messages to summarize.")
        return
    conversation_text = "\n".join([f"{message['role']}: {message['content']}" for message in MESSAGE_HISTORY])

    summary_prompt = f"""
        Please provide a concise summary of the following conversation, in the third person point of view. Focus on:
        1. Main topics discussed
        2. Key decisions or outcomes
        3. Important information exchanged
        4. Overall context and flow

        Conversation:
        {conversation_text}

        Summary:"""
    try:
        chat_bot_response = client.chat.completions.create(
          model=CHAT_MODEL,
            messages=[{"role":"user", "content":conversation_text}],
            max_tokens=150,
            temperature=0.3)
        summary = chat_bot_response.choices[0].message.content.strip()
        print(f"✅ Generated summary ({len(summary)} characters)")
        return summary
    except Exception as e:
        print(f"❌ Error generating summary: {e}")
        return "Summary generation failed"

## Function to create periodic summary

In [None]:
def periodic_summarization():
        global MESSAGE_HISTORY
        global SUMMARY

        # Only generate a new summary if there's enough new conversation
        if len(MESSAGE_HISTORY) >= 2:
            current_summary = generate_summary(MESSAGE_HISTORY)

            if SUMMARY:
                # Combine with previous summary
                combined_prompt = f"""
                Previous summary: {SUMMARY}

                New conversation summary: {current_summary}

                Please create a comprehensive summary that combines both:"""

                try:
                    response = client.chat.completions.create(
                        model=CHAT_MODEL,
                        messages=[{"role": "user", "content": combined_prompt}],
                        max_tokens= 300,
                        temperature=0.3
                    )

                    SUMMARY = response.choices[0].message.content.strip()

                except Exception as e:
                    print(f"❌ Error combining summaries: {e}")
                    SUMMARY = current_summary
            else:
                SUMMARY = current_summary

            # Replace history with summary
            MESSAGE_HISTORY = [
                {"role": "system", "content": CHAT_MODEL_SYSTEM_PROMPT},
                {"role": "system", "content": f"Previous conversation summary: {SUMMARY}"}
            ]

            print(f"Conversation summarized and history reset")
            print(f"Current summary length: {len(SUMMARY)} characters")

## Helper Functions

In [None]:
def count_words(text: str) -> int:
    return len(text.split())

def count_characters(messages: List[Dict]) -> int:
    return sum(len(msg["content"]) for msg in messages)

def count_total_words(messages: List[Dict]) -> int:
    return sum(count_words(msg["content"]) for msg in messages)

## Function to truncate conversation by turns

In [None]:
max_turns = 6
def truncate_by_turns(messages: List[Dict]) -> List[Dict]:
  if max_turns and len(messages) > max_turns:
      truncated = messages[-max_turns:]
      print(f"Truncated to last {max_turns} messages")
      return truncated
  return messages

## Truncate by length

In [None]:
max_characters = 300
max_words = 80
def truncate_by_length(messages: List[Dict]) -> List[Dict]:
    if not (max_characters or max_words):
        return messages

    result = []
    current_chars = 0
    current_words = 0

    # Start from the end to keep most recent messages
    for message in reversed(messages):
        msg_chars = len(message["content"])
        msg_words = count_words(message["content"])

        # Check if adding this message would exceed limits
        if (max_characters and current_chars + msg_chars > max_characters) or (max_words and current_words + msg_words > max_words):
            break

        result.insert(0, message)
        current_chars += msg_chars
        current_words += msg_words

    if len(result) < len(messages):
        removed = len(messages) - len(result)
        print(f"📝 Truncated {removed} messages due to length constraints")

    return result

## Get truncated history

In [None]:
def get_truncated_history(trunc_type:str) -> List[Dict]:
        messages = MESSAGE_HISTORY.copy()
        if not trunc_type:
          raise ValueError("Truncation type not provided")

        # Apply truncation strategies
        if trunc_type == "turns":
          return truncate_by_turns(messages)

        return truncate_by_length(messages)

## Get overall, used parameters

In [None]:
def get_statistics() -> Dict:
    """Get detailed statistics about the conversation."""
    stats = {
            "total_turns": turn_count,
            "current_messages": len(MESSAGE_HISTORY),
            "total_characters": count_characters(MESSAGE_HISTORY),
            "total_words": count_total_words(MESSAGE_HISTORY),
            "has_summary": bool(SUMMARY),
            "summary_length": len(SUMMARY) if SUMMARY else 0,
            "summarization_interval": summarization_interval,
            "truncation_limits": {
                "max_turns": max_turns,
                "max_characters": max_characters,
                "max_words": max_words
            }
        }
    return stats

In [None]:
get_statistics()

{'total_turns': 0,
 'current_messages': 8,
 'total_characters': 1676,
 'total_words': 279,
 'has_summary': True,
 'summary_length': 969,
 'summarization_interval': 8,
 'truncation_limits': {'max_turns': 6, 'max_characters': 300, 'max_words': 80}}

## **Test Conversation Manager**

## Try Conversation with LLM

In [None]:
while True:
    user_input = input("User: ")
    add_message({"role": "user", "content": user_input})

    try:
        chat_bot_response = client.chat.completions.create(
            model=CHAT_MODEL,
            messages=MESSAGE_HISTORY
        )
        response_content = chat_bot_response.choices[0].message.content
        add_message({"role": "assistant", "content": response_content})

        print(f"Bot: {response_content}")

    except Exception as e:
        print(f"An error occurred: {e}")

## Test with test message jsons

In [None]:
conversation_samples = [
        ("user", "Hello! I'm looking for advice on starting a small business."),
        ("assistant", "Great! I'd be happy to help. What type of business are you considering?"),
        ("user", "I'm thinking about opening a coffee shop in downtown Seattle."),
        ("assistant", "Excellent choice! Seattle has a great coffee culture. Have you done any market research?"),
        ("user", "Not yet. Where should I start with market research?"),
        ("assistant", "I recommend starting with competitor analysis, foot traffic studies, and customer surveys."),
        ("user", "That sounds comprehensive. What about financing options?"),
        ("assistant", "For coffee shops, you have several options: SBA loans, traditional bank loans, investors, or crowdfunding.")
    ]

In [None]:
# Clear previous history for a clean test
MESSAGE_HISTORY = []
turn_count = 0
SUMMARY = ""

print("--- Testing Truncation ---")

# fill message history with sample conversation
for role, content in conversation_samples:
    add_message({"role": role, "content": content})

print("\n--- Original Message History ---")
for msg in MESSAGE_HISTORY:
    print(f"{msg['role']}: {msg['content']}")
print(f"Total messages: {len(MESSAGE_HISTORY)}")

print("\n--- Truncating by Turns (max_turns=6) ---")
truncated_by_turns = get_truncated_history(trunc_type="turns")
for msg in truncated_by_turns:
    print(f"{msg['role']}: {msg['content']}")
print(f"Truncated messages: {len(truncated_by_turns)}")


print("\n--- Truncating by Length (max_characters=300, max_words=80) ---")
truncated_by_length = get_truncated_history(trunc_type="length")
for msg in truncated_by_length:
    print(f"{msg['role']}: {msg['content']}")
print(f"Truncated messages: {len(truncated_by_length)}")

print("\n--- Truncation Test Complete ---")

--- Testing Truncation ---
Turn 1: Added {'role': 'user', 'content': "Hello! I'm looking for advice on starting a small business."}
Turn 2: Added {'role': 'assistant', 'content': "Great! I'd be happy to help. What type of business are you considering?"}
Turn 3: Added {'role': 'user', 'content': "I'm thinking about opening a coffee shop in downtown Seattle."}
Turn 4: Added {'role': 'assistant', 'content': 'Excellent choice! Seattle has a great coffee culture. Have you done any market research?'}
Turn 5: Added {'role': 'user', 'content': 'Not yet. Where should I start with market research?'}
Turn 6: Added {'role': 'assistant', 'content': 'I recommend starting with competitor analysis, foot traffic studies, and customer surveys.'}
Turn 7: Added {'role': 'user', 'content': 'That sounds comprehensive. What about financing options?'}
Turn 8: Added {'role': 'assistant', 'content': 'For coffee shops, you have several options: SBA loans, traditional bank loans, investors, or crowdfunding.'}
Tur

# **Task 2: JSON Schema Classification & Information Extraction**

## Required JSON Schemas and Prompts

In [42]:
from pydantic import BaseModel, EmailStr
from typing import List, Dict, Literal, Optional

class UserInformationModel(BaseModel):
    name: Optional[str] = None
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    location: Optional[str] = None
    age: Optional[int] = None
    tech_stack: Optional[List[str]] = None

In [43]:
SYSTEM_PROMPT = system_prompt = """You are an expert information extractor. Your task is to carefully analyze chat conversations and extract specific user information when available.


FOLLOW THESE IMPORTANT GUIDELINES:
1. Extract only information that is explicitly mentioned in the conversation
2. Do not make assumptions or infer information not directly stated
3. For names: Extract full names when provided
4. For emails: Look for email addresses in standard formats, should contain '@'
5. For phones: Extract any phone number mentioned (any format, should be 10 digit)
6. For locations: Extract cities, states, addresses, or geographical references
7. For age: Extract numerical age when mentioned
8. For tech_stack: Extract a list of technical skills when mentioned

Cross check all information before giving an output.

If information is not clearly given, leave that field empty instead of guessing."""

## Function to extract information

In [45]:
def extract_information(chat_conversation: str | List[Dict]) -> dict:

  USER_PROMPT = f"""Please extract any available user information from this chat conversation:

{chat_conversation}

Use the extract_user_information function to provide the structured output."""
  try:
      chat_bot_response = client.chat.completions.create(
            model=STRUCTURED_OUTPUT_MODEL,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role":"user", "content":USER_PROMPT},
            ],
            response_format =  {
                "type": "json_schema",
                "json_schema":{
                    "name":"user_information",
                    "description":"User information extracted from the chat conversation",
                    "schema":UserInformationModel.model_json_schema()

                } }
        )
      return chat_bot_response.choices[0].message.content
  except Exception as e:
    raise Exception(f"Error extracting information: {e}")

## Test Information Extraction

In [49]:
SAMPLE_CHATS = [
    # Sample 1: Customer support chat with complete information, including tech stack
    """Customer: Hi, I'm having trouble with my order
Agent: I'd be happy to help! Can I get your name and email?
Customer: Sure, it's Sarah Johnson and my email is sarah.johnson@email.com
Agent: Thank you Sarah. Can I also get your phone number and address?
Customer: Yes, my phone is (555) 123-4567 and I live in Seattle, WA. I'm 28 years old by the way, does that matter for shipping?
Agent: Thank you for all that information! Just to confirm, what tech do you use the most?
Customer: Mostly Python and Django for my small business website.
Agent: Let me look up your order.""",

    # Sample 2: Registration chat with partial information & tech stack
    """User: I want to sign up for your newsletter
Bot: Great! What's your email address?
User: It's mike.chen@company.com
Bot: Thanks! And your name?
User: Mike Chen
Bot: Any location preference for local events?
User: I'm based in San Francisco.
Bot: Do you use any specific tech tools or frameworks at work?
User: Node.js and React.
Bot: Perfect, you're all set!""",

    # Sample 3: Survey chat with mixed information
    """Interviewer: Thank you for participating in our survey. Could you share some basic information?
Participant: Of course
Interviewer: What's your name and age?
Participant: I'm Jennifer Davis, 35 years old
Interviewer: Great! Do you have a preferred contact method?
Participant: You can reach me at jennifer.davis.work@gmail.com or call me at 555-987-6543
Interviewer: And which city are you located in?
Participant: I live in Austin, Texas
Interviewer: Do you work in any particular technology area?
Participant: Yes, mainly with AWS, Java, and Docker.
Interviewer: Perfect, that's all we need!""",

    # Sample 4: Job application chat with multiple tech skills
    """Recruiter: Welcome, could you please tell me your name and how to reach you?
Candidate: My name is Priya Sharma. You can email me at priya.sharma123@gmail.com.
Recruiter: And your phone number and current location?
Candidate: Sure, my phone is +91-9876543210, and I'm currently in Bengaluru.
Recruiter: What's your age and primary tech stacks?
Candidate: I'm 25. I primarily work with Vue.js, FastAPI, and PostgreSQL.
Recruiter: Noted. Do you have experience with cloud platforms?
Candidate: Yes, GCP and Azure are part of my daily workflow.""",

    # Sample 5: Casual onboarding chat with partial information
    """Mentor: Hi! Welcome aboard. What's your full name?
Newcomer: I'm Alex Martinez.
Mentor: Great, Alex. You can share your contact details when you're ready.
Newcomer: Sure! My email: alexmartinez@startup.io
Mentor: Where are you joining from?
Newcomer: I'm based in Boston. I usually build with Flutter and Firebase.
Mentor: Awesome! Age is optional, but it helps with mentorship pairing.
Newcomer: I'm 30.
Mentor: Thanks, Alex. Let's start!"""
]

In [52]:
print("--- Testing Information Extraction with Sample Chats ---")

for i, chat in enumerate(SAMPLE_CHATS):
    print(f"\n--- Sample Chat {i+1} ---")
    print(chat)
    try:
        extracted_data_json = extract_information(chat)

        extracted_data = json.loads(extracted_data_json)
        validated_data = UserInformation.model_validate(extracted_data)
        print("\n✅ Extracted and Validated Information:")
        print(validated_data.model_dump_json(indent=2))
    except Exception as e:
        print(f"\n❌ Error processing sample chat {i+1}: {e}")

print("\n--- Information Extraction Test Complete ---")

--- Testing Information Extraction with Sample Chats ---

--- Sample Chat 1 ---
Customer: Hi, I'm having trouble with my order
Agent: I'd be happy to help! Can I get your name and email?
Customer: Sure, it's Sarah Johnson and my email is sarah.johnson@email.com
Agent: Thank you Sarah. Can I also get your phone number and address?
Customer: Yes, my phone is (555) 123-4567 and I live in Seattle, WA. I'm 28 years old by the way, does that matter for shipping?
Agent: Thank you for all that information! Just to confirm, what tech do you use the most?
Customer: Mostly Python and Django for my small business website.
Agent: Let me look up your order.

✅ Extracted and Validated Information:
{
  "name": "Sarah Johnson",
  "email": "sarah.johnson@email.com",
  "phone": "(555) 123-4567",
  "location": "Seattle, WA",
  "age": 28,
  "tech_stack": [
    "Python",
    "Django"
  ]
}

--- Sample Chat 2 ---
User: I want to sign up for your newsletter
Bot: Great! What's your email address?
User: It's mi