In [1]:
import os
import json
import time
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field
from jsonschema import validate, ValidationError

In [None]:
import openai

def create_openai_client_for_groq():
    """
    Create an OpenAI-compatible client for Groq API.
    Requires GROQ_API_KEY in your environment.
    """
    base_url = "https://api.groq.com/openai/v1"
    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        raise RuntimeError("Set GROQ_API_KEY as an environment variable before using this cell.")

    client = OpenAI(api_key=api_key, base_url=base_url)
    return client



def groq_chat_request_raw(
    messages: List[Dict[str, str]],
    model: str = "llama-3.3-70b-versatile",
    max_tokens: int = 512,
):
    """
    Send raw POST request to Groq OpenAI-compatible endpoint.
    """
    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        raise RuntimeError("Set GROQ_API_KEY in env before running.")

    endpoint = "https://api.groq.com/openai/v1/chat/completions"
    payload = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": 0.0,
    }
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    resp = requests.post(endpoint, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()


In [4]:
@dataclass
class ConversationHistory:
    raw_messages: List[Dict[str, str]] = field(default_factory=list)
    summary: Optional[str] = None
    runs_since_last_summary: int = 0

    def append(self, role: str, content: str):
        self.raw_messages.append({"role": role, "content": content})
        self.runs_since_last_summary += 1

    def truncate_by_turns(self, n: int) -> List[Dict[str,str]]:
        """Keep only last n messages (turns = messages)."""
        return self.raw_messages[-n:]

    def truncate_by_chars(self, max_chars: int) -> List[Dict[str,str]]:
        """Keep as many trailing messages until char limit reached."""
        out = []
        total = 0
        for msg in reversed(self.raw_messages):
            l = len(msg['content'])
            if total + l > max_chars and out:
                break
            out.append(msg)
            total += l
        return list(reversed(out))

    def truncate_by_words(self, max_words: int) -> List[Dict[str,str]]:
        out = []
        total = 0
        for msg in reversed(self.raw_messages):
            w = len(msg['content'].split())
            if total + w > max_words and out:
                break
            out.append(msg)
            total += w
        return list(reversed(out))

    def maybe_summarize(self, k: int, summarizer_fn):
        """
        If runs_since_last_summary >= k, call summarizer_fn(conversation) -> summary string,
        store summary and replace earlier history (we'll keep the summary as a system message).
        """
        if self.runs_since_last_summary >= k:
            # create combined text to summarize
            combined = "\n".join([f"{m['role']}: {m['content']}" for m in self.raw_messages])
            new_summary = summarizer_fn(combined)
            # Replace the whole history with a system summary + keep last few messages (configurable).
            # Here we keep the summary as a system message and last 3 messages to preserve context.
            tail = self.truncate_by_turns(3)
            self.raw_messages = [{"role": "system", "content": f"SUMMARY: {new_summary}"}] + tail
            self.summary = new_summary
            self.runs_since_last_summary = 0
            return True
        return False


In [6]:
def naive_summarizer(text: str, max_sentences: int = 3) -> str:
    # Very simple: split into sentences and return first k sentences.
    import re
    sents = re.split(r'(?<=[.!?])\s+', text.strip())
    return " ".join(sents[:max_sentences])

def groq_summarizer(text: str, model: str = "gpt-4o-mini"):
    # Make a conversation request to the model asking for a short summary.
    client = None
    try:
        client = create_openai_client_for_groq()
    except RuntimeError:
        # If no GROQ_API_KEY present, return a mock summary for demonstration.
        return "[MOCK SUMMARY: set GROQ_API_KEY to run real summarization]"
    messages = [
        {"role": "system", "content": "You are a concise summarizer. Produce a short paragraph summary (max 60 words)."},
        {"role": "user", "content": f"Summarize the following conversation:\n\n{text}"}
    ]
    # Using raw requests example to ensure compatibility.
    resp = groq_chat_request_raw(messages, model=model, max_tokens=150)
    # Extract text depending on response format; adapt for Groq response shape.
    # For OpenAI-style responses:
    try:
        return resp['choices'][0]['message']['content'].strip()
    except Exception:
        return "[UNABLE TO PARSE GROQ RESPONSE]"


In [7]:
# Create history and feed multiple messages (simulate)
history = ConversationHistory()

sample_msgs = [
    ("user", "Hi, I'm Abhijit. I'm looking for AI/ML backend roles."),
    ("assistant", "Great — what's your primary tech stack?"),
    ("user", "Python, FastAPI, HuggingFace, Qdrant, Docker."),
    ("assistant", "Do you have experience with AWS and Kafka?"),
    ("user", "Yes, some with AWS and Kafka; built a small pipeline."),
    ("assistant", "What's your preferred location?"),
    ("user", "India or fully remote."),
    ("assistant", "Thanks — I'll search and prepare tailored applications.")
]

for role, text in sample_msgs:
    history.append(role, text)

print("Full history (raw):")
print(json.dumps(history.raw_messages, indent=2))

print("\nTruncate by last 4 turns:")
print(json.dumps(history.truncate_by_turns(4), indent=2))

print("\nTruncate by chars (max 120 chars):")
print(json.dumps(history.truncate_by_chars(120), indent=2))

print("\nTruncate by words (max 40 words):")
print(json.dumps(history.truncate_by_words(40), indent=2))

# Demonstrate k-th summarization after every 3 runs:
print("\nDemonstrate periodic summarization (k=3) using naive summarizer.")
# Simulate additional runs to trigger summarization:
for i in range(3):
    history.append("user", f"Extra message {i+1}")
triggered = history.maybe_summarize(3, lambda t: naive_summarizer(t, max_sentences=2))
print("Summarization triggered:", triggered)
print("History after summarization:")
print(json.dumps(history.raw_messages, indent=2))
print("Stored summary:", history.summary)


Full history (raw):
[
  {
    "role": "user",
    "content": "Hi, I'm Abhijit. I'm looking for AI/ML backend roles."
  },
  {
    "role": "assistant",
    "content": "Great \u2014 what's your primary tech stack?"
  },
  {
    "role": "user",
    "content": "Python, FastAPI, HuggingFace, Qdrant, Docker."
  },
  {
    "role": "assistant",
    "content": "Do you have experience with AWS and Kafka?"
  },
  {
    "role": "user",
    "content": "Yes, some with AWS and Kafka; built a small pipeline."
  },
  {
    "role": "assistant",
    "content": "What's your preferred location?"
  },
  {
    "role": "user",
    "content": "India or fully remote."
  },
  {
    "role": "assistant",
    "content": "Thanks \u2014 I'll search and prepare tailored applications."
  }
]

Truncate by last 4 turns:
[
  {
    "role": "user",
    "content": "Yes, some with AWS and Kafka; built a small pipeline."
  },
  {
    "role": "assistant",
    "content": "What's your preferred location?"
  },
  {
    "role": "us

In [8]:
# Define the JSON schema to extract 5 fields: name, email, phone, location, age
info_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "email": {"type": "string", "format": "email"},
        "phone": {"type": "string"},
        "location": {"type": "string"},
        "age": {"type": ["integer", "null"], "minimum": 0, "maximum": 120}
    },
    "required": ["name"],   # at least name required
    "additionalProperties": False
}


In [9]:
# Prepare a "function" description as OpenAI's function-calling style.
# We'll ask the model to return only JSON conforming to the schema.

function_description = {
    "name": "extract_user_info",
    "description": "Extract user's personal info (name, email, phone, location, age) from a chat message. Return JSON strictly according to the provided schema.",
    "parameters": info_schema  # re-use our schema as the function parameters
}

def call_groq_for_extraction(user_chat: str, model="gpt-4o-mini"):
    """
    Example request to Groq that asks the model to produce a JSON object conforming to the schema.
    The model may support function-calling; if not, we instruct the model to output JSON only.
    """
    try:
        client = create_openai_client_for_groq()
    except RuntimeError:
        # If no API key set, return mocked responses (for testing offline)
        return {"name":"Abhijit", "email":"abhijit@example.com", "phone":"[redacted]", "location":"India", "age":30}

    # For maximum compatibility, instruct the model to respond ONLY with JSON following schema
    messages = [
        {"role":"system", "content":"You must output a JSON object and nothing else. Follow the schema precisely."},
        {"role":"user", "content": f"Chat: ```{user_chat}```\n\nExtract name, email, phone, location, age if present. If not present, use null."}
    ]

    # Use raw request (OpenAI-compatible endpoint)
    resp = groq_chat_request_raw(messages, model=model, max_tokens=200)
    # Attempt to extract the model output (varies by provider response shape)
    try:
        content = resp['choices'][0]['message']['content']
        # parse JSON (model should return JSON)
        parsed = json.loads(content)
        return parsed
    except Exception as e:
        # If parsing fails, return error message
        return {"error": "Unable to parse model response", "raw_response": resp}


In [10]:
sample_chats = [
    "Hi, this is Abhijit Rajkumar. My email is abhijit.raj@example.com and my phone is +91-9876543210. I'm 30 and based in Bengaluru, India.",
    "Hello, I'm Aisha. Reach me at aisha@example.co. I'm currently in Mumbai.",
    "This is a short message — no contact info here."
]

for chat in sample_chats:
    print("Chat:", chat)
    extracted = call_groq_for_extraction(chat)
    print("Extracted:", extracted)
    # Validate against schema if possible
    try:
        validate(instance=extracted, schema=info_schema)
        print("Validation: OK ✅")
    except ValidationError as ve:
        print("Validation: FAILED ❌ -", str(ve))
    print("-"*60)


Chat: Hi, this is Abhijit Rajkumar. My email is abhijit.raj@example.com and my phone is +91-9876543210. I'm 30 and based in Bengaluru, India.


HTTPError: 404 Client Error: Not Found for url: https://api.groq.com/openai/v1/chat/completions

In [14]:
import requests
import os

endpoint = "https://api.groq.com/openai/v1/chat/completions"
api_key = os.environ.get("GROQ_API_KEY")
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}
data = {
  "model": "llama-3.3-70b-versatile",   # <-- changed from llama3-8b-8192
  "messages": [
    {"role": "user", "content": "Test"}
  ],
  "temperature": 0.0
}

resp = requests.post(endpoint, headers=headers, json=data)
print(resp.status_code, resp.text)


200 {"id":"chatcmpl-bf2371de-c6be-472a-8fd8-7a65f4b69fde","object":"chat.completion","created":1757906820,"model":"llama-3.3-70b-versatile","choices":[{"index":0,"message":{"role":"assistant","content":"It looks like you're testing our connection. Everything seems to be working fine. How can I assist you today?"},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.050493941,"prompt_tokens":36,"prompt_time":0.01039972,"completion_tokens":24,"completion_time":0.048741851,"total_tokens":60,"total_time":0.059141571},"usage_breakdown":null,"system_fingerprint":"fp_3f3b593e33","x_groq":{"id":"req_01k55pjqknfnxs76tecy6xk9kn"},"service_tier":"on_demand"}

