<a href="https://colab.research.google.com/github/DwarakaMadhu/Data-sts-cohere-api/blob/main/groq_task_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Groq APIs assignment — Colab notebook

This notebook implements two core tasks using Groq APIs with OpenAI-SDK-compatible call patterns:

1. Conversational data management & summarization.
2. Chat classification & structured extraction.

Includes runnable code, a mock Groq client for offline testing, sample chat data, and optional GitHub upload code.


In [44]:
# Install required libraries (run in Colab)
!pip install --quiet tiktoken sentencepiece nbformat --upgrade

import os, json
from typing import Dict


In [45]:
# Save as helper (can be moved to a separate file if needed)
class GroqClientMock:
    def __init__(self):
        pass

    def summarize(self, text: str) -> str:
        """Very simple summarizer"""
        words = text.split()
        return " ".join(words[:20]) + ("..." if len(words) > 20 else "")

    def classify(self, text: str) -> dict:
        """Very simple classifier with keywords"""
        low = text.lower()
        if "refund" in low or "charged" in low or "payment" in low:
            return {"categories": ["billing_issue"], "confidence": 0.9}
        elif "export" in low or "import" in low:
            return {"categories": ["feature_request"], "confidence": 0.85}
        elif "hello" in low or "hi" in low:
            return {"categories": ["greeting"], "confidence": 0.8}
        else:
            return {"categories": ["general"], "confidence": 0.5}

    def classify_and_extract(self, text: str) -> dict:
        """Combine classification and summarization"""
        classification = self.classify(text)
        summary = self.summarize(text)
        return {
            "categories": classification["categories"],
            "confidence": classification["confidence"],
            "summary": summary
        }


In [46]:


# Use no leading folder — writes next to your notebook
helper_path = 'groq_task_helpers.py'
with open(helper_path, 'w', encoding='utf-8') as f:
    f.write(helper_code)
print('Wrote groq_task_helpers.py to', helper_path)


Wrote groq_task_helpers.py to groq_task_helpers.py


In [47]:
from groq_task_helpers import GroqClientMock
client = GroqClientMock()
print('GroqClientMock loaded')


GroqClientMock loaded


In [48]:
from typing import Dict
sample_chats = [
    {
        'id': 'chat_001',
        'messages': [
            {'role': 'user', 'text': 'Hi, I need help with my subscription. I was charged twice.'},
            {'role': 'assistant', 'text': 'Sorry to hear that — can you share the order id?'},
            {'role': 'user', 'text': 'Order 12345. I also want to know if I can get a refund.'}
        ]
    },
    {
        'id': 'chat_002',
        'messages': [
            {'role': 'user', 'text': 'Does your product support CSV export and bulk import?'},
            {'role': 'assistant', 'text': 'Yes — CSV export is available in settings.'}
        ]
    }
]

print("Sample chats prepared ")


Sample chats prepared 


In [49]:
from typing import Dict, List

class GroqClientMock:
    def summarize(self, text: str) -> str:
        """Very simple summarizer (mock)."""
        if "refund" in text or "payment" in text:
            return "User has a billing issue about refund/payment."
        elif "CSV" in text or "export" in text:
            return "User is asking about product CSV export features."
        else:
            return "General user query."

    def classify_and_extract(self, text: str) -> Dict:
        """Mock classification + extraction with summary + categories."""
        if "refund" in text or "payment" in text:
            return {
                "categories": ["billing_issue"],   # ✅ plural key
                "summary": "User has a billing issue about refund/payment.",  # ✅ added
                "entities": ["refund", "payment"],
                "confidence": 0.9
            }
        elif "CSV" in text or "export" in text:
            return {
                "categories": ["feature_request"],
                "summary": "User is asking about CSV export feature.",
                "entities": ["CSV", "export"],
                "confidence": 0.85
            }
        else:
            return {
                "categories": ["general"],
                "summary": "General user question.",
                "entities": [],
                "confidence": 0.7
            }



In [50]:

client = GroqClientMock()
def classify_and_extract_chat(chat: Dict, client: GroqClientMock) -> Dict:
    text = chat_to_text(chat)
    return client.classify_and_extract(text)

# now outside the function
for c in sample_chats:
    out = classify_and_extract_chat(c, client)
    print("Output for", c['id'], ":", out)   # 👀 see what the dictionary looks like
    assert 'categories' in out and 'summary' in out

print('Basic checks passed')








Output for chat_001 : {'categories': ['billing_issue'], 'summary': 'User has a billing issue about refund/payment.', 'entities': ['refund', 'payment'], 'confidence': 0.9}
Output for chat_002 : {'categories': ['feature_request'], 'summary': 'User is asking about CSV export feature.', 'entities': ['CSV', 'export'], 'confidence': 0.85}
Basic checks passed


In [51]:
## Quick checks
assert isinstance(sample_chats, list)
for c in sample_chats:
    out = classify_and_extract_chat(c, client)
    assert 'categories' in out and 'summary' in out
print('Basic checks passed ')




Basic checks passed 


## GitHub upload (optional)

To push this notebook to GitHub programmatically, set environment variables GITHUB_TOKEN and GITHUB_REPO and use the provided function in the notebook. Use with caution.

In [52]:
import base64
import os
import requests

def push_to_github(filepath='/mnt/data/groq_task_notebook.ipynb', repo_env='GITHUB_REPO'):
    token = os.getenv('GITHUB_TOKEN')
    repo = os.getenv(repo_env)
    if not token or not repo:
        raise EnvironmentError('Set GITHUB_TOKEN and GITHUB_REPO environment variables first')
    with open(filepath,'rb') as f:
        content_b = f.read()
    content = base64.b64encode(content_b).decode('utf-8')
    url = f'https://api.github.com/repos/{repo}/contents/{os.path.basename(filepath)}'
    data = {
        'message': 'Add Groq task notebook',
        'content': content
    }
    headers = {'Authorization': f'token {token}'}
    resp = requests.put(url, json=data, headers=headers)
    return resp.status_code, resp.json()

print('push_to_github function ready')


push_to_github function ready
