In [19]:
# ================================================================
# Prakhar Saxena - Task 1 (Groq API Assignment)
# Conversation Management + Summarization
# ================================================================

!pip install openai -q

import os
import textwrap
from openai import OpenAI

# --- Setup (Groq API with OpenAI SDK compatibility) ---
client = OpenAI(
    api_key="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",   
    base_url="https://api.groq.com/openai/v1"
)

# --- Conversation Tracker with Auto-Summarization ---
class PrakharConversationTracker:
    def __init__(self, client, model="llama-3.1-8b-instant", summarize_every=3):
        self.client = client
        self.model = model
        self.messages = []
        self.summarize_every = summarize_every
        self.summaries = []

    def add_message(self, role, content):
        """Add a message to the conversation and auto-summarize if threshold reached."""
        self.messages.append({"role": role, "content": content})

        # If user sends a message → trigger assistant response
        if role == "user":
            response = self.client.chat.completions.create(
                model=self.model,
                messages=self.messages
            )
            reply = response.choices[0].message.content
            print(f"Assistant: {reply}")
            self.messages.append({"role": "assistant", "content": reply})

        # Check if we need to summarize
        if len(self.messages) % self.summarize_every == 0:
            self._summarize()

    def _summarize(self):
        """Ask the model to summarize the conversation so far."""
        response = self.client.chat.completions.create(
            model=self.model,
            messages=self.messages + [
                {"role": "system", "content": "Summarize the conversation so far in 2 sentences."}
            ]
        )
        summary = response.choices[0].message.content
        self.summaries.append(summary)
        print(f"\n--- Summary Added ---\n{summary}\n")

In [20]:
# Demo
conv = PrakharConversationTracker(client, summarize_every=3)

conv.add_message("user", "Hi, can you help me with Python basics?")
conv.add_message("assistant", "Of course! What specific topic are you struggling with?")
conv.add_message("user", "I want to understand loops.")

Assistant: I'd be happy to help you with Python basics. What specifically do you want to learn or need help with? Do you have any prior programming experience, or is this your first time with Python?

Here are some areas I can assist with:

1. **Basic syntax**: Understanding variables, data types, control structures (if-else statements, for loops, etc.), functions, and modules.
2. **Data types and operations**: Working with strings, lists, dictionaries, and other built-in data types in Python.
3. **File Input/Output and Persistence**: Reading and writing files, working with CSV and JSON data, and using databases like SQLite.
4. **Error Handling and Debugging**: Learning how to handle exceptions, use try-except blocks, and debug your code.
5. **Functions and Modules**: Creating reusable functions, importing libraries, and understanding the concept of modules in Python.

Let me know which topic you're interested in, and I'll provide you with examples and explanations to get you started!


In [23]:
# ------------------------------
# Task 2: JSON Schema Classification & Extraction
# Prakhar Saxena - Groq OpenAI-compatible function calling demo
# ------------------------------

# Install required libs
!pip install openai jsonschema -q

import os
import json
from openai import OpenAI
from jsonschema import validate, ValidationError

# Setup client (safe placeholder)
# Use env var GROQ_API_KEY in Colab (preferred). Keep placeholder for GitHub.
GROQ_KEY = os.getenv("GROQ_API_KEY", "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")
client = OpenAI(api_key=GROQ_KEY, base_url="https://api.groq.com/openai/v1")

# JSON Schema (strict) - extract 5 fields
person_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "minLength": 1},
        "email": {
            "type": "string",
            "format": "email"
        },
        "phone": {
            "type": "string",
            "pattern": r"^[\d\-\+\(\) ]{7,20}$"
        },
        "location": {"type": "string"},
        "age": {
            "type": "integer",
            "minimum": 0,
            "maximum": 130
        }
    },
    "required": ["name"],
    "additionalProperties": False
}

# Function declaration for function-calling
# This is the OpenAI-style "functions" parameter describing the schema
functions = [
    {
        "name": "extract_person_info",
        "description": "Extract contact and basic personal details from a user's chat message",
        "parameters": person_schema  # pass the JSON Schema as "parameters" (OpenAI style)
    }
]

# Helper: call model with function-calling
def call_extraction_model(chat_text, model="llama-3.1-8b-instant"):
    """
    Calls Groq via OpenAI-compatible client.chat.completions.create
    with a single user message and a function descriptor.
    Returns parsed JSON (dict) if function call present; otherwise returns None.
    """
    # Build messages
    messages = [
        {"role": "system", "content": "You are an information extractor. Output only a function call with JSON arguments that match the function schema."},
        {"role": "user", "content": chat_text}
    ]

    # Call the API with functions + automatic function calling
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        functions=functions,
        function_call="auto",  # prefer function call
        max_tokens=300
    )

    # Inspect returned structure: try to get function call content
    choice = response.choices[0]
    # Depending on SDK version, structure might be in choice.message.function_call or choice.message.get("function_call")
    function_call = None
    try:
        # Newer object-style
        function_call = choice.message.function_call
    except Exception:
        function_call = None

    # Fallback: some SDKs expose it in raw dict
    if not function_call:
        # try dict access
        try:
            function_call = choice["message"]["function_call"]
        except Exception:
            function_call = None

    if not function_call:
        # Model did not return function_call; maybe it printed content.
        # Try to parse the content as JSON (lenient fallback).
        content_text = ""
        try:
            content_text = choice.message.content
        except Exception:
            content_text = choice["message"].get("content", "")
        # Try to find JSON inside content_text
        try:
            parsed = json.loads(content_text.strip())
            return parsed, response
        except Exception:
            return None, response

    # function_call.arguments is usually a JSON string
    arguments_str = None
    try:
        arguments_str = function_call.arguments
    except Exception:
        try:
            arguments_str = function_call.get("arguments")
        except Exception:
            arguments_str = None

    if not arguments_str:
        return None, response

    # Parse JSON arguments
    try:
        parsed_args = json.loads(arguments_str)
        return parsed_args, response
    except Exception:
        # If the model returned something non-JSON, attempt manual cleaning
        try:
            # attempt to find first {...} JSON substring
            start = arguments_str.find("{")
            end = arguments_str.rfind("}") + 1
            parsed_args = json.loads(arguments_str[start:end])
            return parsed_args, response
        except Exception:
            return None, response

# Helper: validate against schema
def validate_output(parsed):
    try:
        validate(instance=parsed, schema=person_schema)
        return True, None
    except ValidationError as e:
        return False, str(e)

# Three sample chats to parse (varying styles)
sample_chats = [
    # Simple explicit
    "Hi, I'm Rajeev Kumar. My email is rajeev.kumar@example.com and my phone is +91 98765-43210. I live in Aligarh and I'm 20 years old.",
    # Casual, with missing fields and punctuation
    "Hey! It's Priya — priya1999@gmail.com. 27 years. Based in Delhi. Tel: 9876543210",
    # Scattered info, possible noisy text
    "Hello, this is Arun. Could you save my contact? arun_123@workmail.co  (phone): (0522) 123-4567. Age: twenty five. Location: Uttar Pradesh."
]

# Run extraction + validation for samples
results = []
for i, chat in enumerate(sample_chats, start=1):
    print(f"\n--- Sample Chat #{i} ---")
    print(chat)
    parsed, raw_resp = call_extraction_model(chat)
    if parsed is None:
        print("➡️ Model did not return structured data. Raw model output (for debugging):")
        # Print useful raw output pieces (safe)
        try:
            print(json.dumps(raw_resp, default=str, indent=2))
        except Exception:
            print(raw_resp)
        results.append({"sample": i, "parsed": None, "valid": False, "error": "No structured output"})
        continue

    print("\nParsed result (raw):")
    print(json.dumps(parsed, indent=2))

    # Convert age to int if it's string with number words sometimes (best-effort)
    if "age" in parsed and isinstance(parsed["age"], str):
        # try to extract digits
        import re
        m = re.search(r"\d{1,3}", parsed["age"])
        if m:
            parsed["age"] = int(m.group())
        else:
            # attempt word->number for simple cases (twenty five -> 25) - minimal
            words_to_digits = {
                "twenty five": 25, "twenty-five": 25, "twenty": 20, "twentyone":21
            }
            cleaned = parsed["age"].lower().strip()
            if cleaned in words_to_digits:
                parsed["age"] = words_to_digits[cleaned]

    # Validate
    ok, err = validate_output(parsed)
    if ok:
        print("\n Validation: PASSED")
        results.append({"sample": i, "parsed": parsed, "valid": True, "error": None})
    else:
        print("\n Validation: FAILED")
        print("Validation error:", err)
        results.append({"sample": i, "parsed": parsed, "valid": False, "error": err})

# ------------------------------
# Summary of results
# ------------------------------
print("\n\n=== SUMMARY ===")
for r in results:
    print(f"Sample {r['sample']}: valid={r['valid']}; error={r['error']}")
    if r["parsed"]:
        print(json.dumps(r["parsed"], indent=2))



--- Sample Chat #1 ---
Hi, I'm Rajeev Kumar. My email is rajeev.kumar@example.com and my phone is +91 98765-43210. I live in Aligarh and I'm 20 years old.

Parsed result (raw):
{
  "age": 20,
  "email": "rajeev.kumar@example.com",
  "location": "Aligarh",
  "name": "Rajeev Kumar",
  "phone": "+91 98765-43210"
}

 Validation: PASSED

--- Sample Chat #2 ---
Hey! It's Priya — priya1999@gmail.com. 27 years. Based in Delhi. Tel: 9876543210
➡️ Model did not return structured data. Raw model output (for debugging):
"ChatCompletion(id='chatcmpl-99f1715e-ac83-452f-8066-2a3ed092c396', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='<function=extract_person_info>{\"name\": \"Priya\",\"email\": \"priya1999@gmail.com\",\"age\": 27,\"location\": \"Delhi\",\"phone\": \"9876543210\"}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1757769216, model='llama-3.1-8b-instant', object='chat.