<a href="https://colab.research.google.com/github/DwarakaMadhu/Data-sts-cohere-api/blob/main/groq_assignment_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === Setup ===
from typing import List, Dict, Any
import json

# Mock Groq client (replace with actual Groq API client for real use)
class GroqClientMock:
    def summarize(self, text: str) -> str:
        # Fake summarization for demo purposes
        return f"Summary: {text[:60]}..."

    def classify_and_extract(self, text: str) -> Dict[str, Any]:
        # Simple heuristic-based mock extractor
        out = {"name": None, "email": None, "phone": None, "location": None, "age": None}
        if "John" in text:
            out["name"] = "John Doe"
        if "@" in text:
            out["email"] = "john@example.com"
        if "12345" in text:
            out["phone"] = "12345"
        if "NYC" in text:
            out["location"] = "New York"
        if "25" in text:
            out["age"] = 25
        return out

client = GroqClientMock()
print(" Mock Groq client ready")

 Mock Groq client ready


## Task 1: Managing Conversation History with Summarization

In [None]:
# Conversation Manager
class ConversationManager:
    def __init__(self, client, k: int = 3):
        self.client = client
        self.history: List[Dict[str, str]] = []
        self.turn_count = 0
        self.k = k  # periodic summarization threshold

    def add_message(self, role: str, text: str):
        self.history.append({"role": role, "text": text})
        self.turn_count += 1

        # Periodic summarization after k turns
        if self.turn_count % self.k == 0:
            self.summarize_history()

    def summarize_history(self):
        full_text = "\n".join([f"{m['role']}: {m['text']}" for m in self.history])
        summary = self.client.summarize(full_text)
        self.history = [{"role": "system", "text": summary}]

    def get_history(self, last_n: int = None, max_chars: int = None) -> str:
        """Return truncated history"""
        text = "\n".join([f"{m['role']}: {m['text']}" for m in self.history])
        if last_n:
            text = "\n".join([f"{m['role']}: {m['text']}" for m in self.history[-last_n:]])
        if max_chars:
            text = text[:max_chars]
        return text

# Demo conversation
conv = ConversationManager(client, k=3)
conv.add_message("user", "Hi, I need help with my subscription.")
conv.add_message("assistant", "Sure, can you provide your order ID?")
conv.add_message("user", "Order 12345. I was charged twice.")

print("After 3rd message (summarized):")
print(conv.get_history())

conv.add_message("assistant", "Thanks. We'll issue a refund.")
print("\nAfter 4th message:")
print(conv.get_history())

print("\nTruncated to last 2 messages:")
print(conv.get_history(last_n=2))

After 3rd message (summarized):
system: Summary: user: Hi, I need help with my subscription.
assistant: Sure,...

After 4th message:
system: Summary: user: Hi, I need help with my subscription.
assistant: Sure,...
assistant: Thanks. We'll issue a refund.

Truncated to last 2 messages:
system: Summary: user: Hi, I need help with my subscription.
assistant: Sure,...
assistant: Thanks. We'll issue a refund.


## Task 2: JSON Schema Classification & Information Extraction

In [None]:
# JSON schema for validation
schema = {
    "name": str,
    "email": str,
    "phone": str,
    "location": str,
    "age": int,
}

def validate_schema(data: Dict[str, Any], schema: Dict[str, Any]) -> bool:
    for key, typ in schema.items():
        if key not in data:
            return False
        if data[key] is not None and not isinstance(data[key], typ):
            return False
    return True

# Sample chats
sample_chats = [
    "Hello, I'm John and my email is john@example.com. I live in NYC and I'm 25 years old.",
    "My name is Alice. Contact me at alice@mail.com, phone 12345.",
    "This is Bob. I'm from NYC."
]

for i, chat in enumerate(sample_chats, 1):
    extracted = client.classify_and_extract(chat)
    valid = validate_schema(extracted, schema)
    print(f"--- Chat {i} ---")
    print("Extracted:", json.dumps(extracted, indent=2))
    print("Schema valid?", valid)

--- Chat 1 ---
Extracted: {
  "name": "John Doe",
  "email": "john@example.com",
  "phone": null,
  "location": "New York",
  "age": 25
}
Schema valid? True
--- Chat 2 ---
Extracted: {
  "name": null,
  "email": "john@example.com",
  "phone": "12345",
  "location": null,
  "age": null
}
Schema valid? True
--- Chat 3 ---
Extracted: {
  "name": null,
  "email": null,
  "phone": null,
  "location": "New York",
  "age": null
}
Schema valid? True


In [5]:
import base64
import os
import requests

def push_to_github(filepath='/mnt/data/groq_assignment_notebook.ipynb', repo_env='GITHUB_REPO'):
    token = os.getenv('GITHUB_TOKEN')
    repo = os.getenv(repo_env)
    if not token or not repo:
        raise EnvironmentError('Set GITHUB_TOKEN and GITHUB_REPO environment variables first')
    with open(filepath,'rb') as f:
        content_b = f.read()
    content = base64.b64encode(content_b).decode('utf-8')
    url = f'https://api.github.com/repos/{repo}/contents/{os.path.basename(filepath)}'
    data = {
        'message': 'Add Groq assignment notebook',
        'content': content
    }
    headers = {'Authorization': f'token {token}'}
    resp = requests.put(url, json=data, headers=headers)
    return resp.status_code, resp.json()

print('push_to_github function ready')

push_to_github function ready
