In [1]:
import os

os.chdir("../")
%pwd

'/Users/tapankheni/Developer/POC-SWE-RAG'

In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [11]:
import json
import re
from groq import Groq
import httpx

In [16]:
groq_chat_url = "https://api.groq.com/openai/v1/chat/completions"

def parse_developer_query(user_input):
    """
    Takes a user query or issue as input, sends it to Groq LLM to analyze,
    and returns a structured JSON with extracted components.
    
    Args:
        user_input (str): The user's query, which may contain code snippets, error messages, etc.
        
    Returns:
        dict: JSON response with user_query, context, and error components
    """
    
    try:
        system_prompt = """
        You are an AI assistant specialized in analyzing developer messages. Your task is to parse technical 
        questions that may contain code snippets, error messages, or other technical information.

        You must extract or construct three key components:
        1. user_query: The natural language question the user is asking. If there's no explicit question, construct 
        one based on the code and errors. This should be a clear, concise representation of what the user wants to know.
        2. context: Any code snippets that provide context for the question. Extract ALL code blocks completely WITHOUT including markdown syntax like ```python or ```. Only include the actual code.
        3. error: Any error messages or stack traces present in the input. Extract these verbatim.

        Return ONLY a valid JSON object with these three fields, nothing else. Do not include explanations or additional text.

        Do NOT fabricate or add any information not present in the original input. If a field has no relevant content, return an empty string for that field.

        Example input:
        Modeling's `separability_matrix` does not compute separability correctly for nested CompoundModels Consider the following model: ```python from astropy.modeling import models as m from astropy.modeling.separable import separability_matrix cm = m.Linear1D(10) & m.Linear1D(5) ``` It's separability matrix as you might expect is a diagonal: ```python >>> separability_matrix(cm) array([[ True, False], [False, True]]) ``` If I make the model more complex: ```python >>> separability_matrix(m.Pix2Sky_TAN() & m.Linear1D(10) & m.Linear1D(5)) array([[ True, True, False, False], [ True, True, False, False], [False, False, True, False], [False, False, False, True]]) ``` The output matrix is again, as expected, the outputs and inputs to the linear models are separable and independent of each other. If however, I nest these compound models: ```python >>> separability_matrix(m.Pix2Sky_TAN() & cm) array([[ True, True, False, False], [ True, True, False, False], [False, False, True, True], [False, False, True, True]]) ``` Suddenly the inputs and outputs are no longer separable? This feels like a bug to me, but I might be missing something?

        Example output:
        {
        "user_query": "Is there a bug in the `separability_matrix` function that causes it to compute separability incorrectly for nested CompoundModels?",
        "context": "from astropy.modeling import models as m\nfrom astropy.modeling.separable import separability_matrix\ncm = m.Linear1D(10) & m.Linear1D(5)\n\nseparability_matrix(cm)\narray([[ True, False], [False, True]])\n\nseparability_matrix(m.Pix2Sky_TAN() & m.Linear1D(10) & m.Linear1D(5))\narray([[ True, True, False, False], [ True, True, False, False], [False, False, True, False], [False, False, False, True]])\n\nseparability_matrix(m.Pix2Sky_TAN() & cm)\narray([[ True, True, False, False], [ True, True, False, False], [False, False, True, True], [False, False, True, True]])",
        "error": ""
        }
        """

        user_prompt = f"""
        Analyze the following developer message and extract the key components as specified:
        
        {user_input}
        
        Return a JSON with user_query, context, and error fields.
        """
    
        messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": user_prompt,
            }
        ]
        
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}",
        }
        
        payload = {
            "model": "gemma2-9b-it",
            "messages": messages,
            "response_format": {
                "type": "json_object"
            },
            "temperature": 0.1,
            "max_completion_tokens": 2048,
            "stream": False,
        }
        
        try:
            with httpx.Client(verify = False) as client:
                response = client.post(groq_chat_url, headers=headers, json=payload)
                response.raise_for_status()
                response_data = response.json()["choices"][0]["message"]["content"]
                print(f'response_data: {response_data}')
                print(f'response_data type: {type(response_data)}')
                return json.loads(response_data)
            
        except json.JSONDecodeError as e:
            print(f'failed to decode the reflected plan: {response.json()["choices"][0]["message"]}')
            return {
                "error" :  "Failed to parse LLM response into valid JSON"
            }
    
    except Exception as e:
        print(f"An error occurred while parsing the developer query: {e}")
        raise e

In [18]:
test_input = """
Modeling's `separability_matrix` does not compute separability correctly for nested CompoundModels
Consider the following model:

```python
from astropy.modeling import models as m
from astropy.modeling.separable import separability_matrix

cm = m.Linear1D(10) & m.Linear1D(5)
```

It's separability matrix as you might expect is a diagonal:

```python
>>> separability_matrix(cm)
array([[ True, False],
[False, True]])
```

If I make the model more complex:
```python
>>> separability_matrix(m.Pix2Sky_TAN() & m.Linear1D(10) & m.Linear1D(5))
array([[ True, True, False, False],
[ True, True, False, False],
[False, False, True, False],
[False, False, False, True]])
```

The output matrix is again, as expected, the outputs and inputs to the linear models are separable and independent of each other.

If however, I nest these compound models:
```python
>>> separability_matrix(m.Pix2Sky_TAN() & cm)
array([[ True, True, False, False],
[ True, True, False, False],
[False, False, True, True],
[False, False, True, True]])
```
Suddenly the inputs and outputs are no longer separable?

This feels like a bug to me, but I might be missing something?
"""

preprocessed_user_input = parse_developer_query(user_input = test_input)
print(preprocessed_user_input)

response_data: {
  "user_query": "Is there a bug in the `separability_matrix` function that causes it to compute separability incorrectly for nested CompoundModels?",
  "context": "from astropy.modeling import models as m\nfrom astropy.modeling.separable import separability_matrix\ncm = m.Linear1D(10) & m.Linear1D(5)\n\nseparability_matrix(cm)\narray([[ True, False],\n[False, True]])\n\nseparability_matrix(m.Pix2Sky_TAN() & m.Linear1D(10) & m.Linear1D(5))\narray([[ True, True, False, False],\n[ True, True, False, False],\n[False, False, True, False],\n[False, False, False, True]])\n\nseparability_matrix(m.Pix2Sky_TAN() & cm)\narray([[ True, True, False, False],\n[ True, True, False, False],\n[False, False, True, True],\n[False, False, True, True]])",
  "error": ""
}
response_data type: <class 'str'>
{'user_query': 'Is there a bug in the `separability_matrix` function that causes it to compute separability incorrectly for nested CompoundModels?', 'context': 'from astropy.modeling import

In [20]:
test_input = """
I'm trying to read a CSV file with pandas but I'm getting an error:

import pandas as pd
df = pd.read_csv('data.csv')
print(df.head())

but got the following error:
FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

how do I fix this?
"""

preprocessed_user_input = parse_developer_query(user_input = test_input)
print(preprocessed_user_input)

response_data: {
  "user_query": "How do I fix the FileNotFoundError when trying to read a CSV file with pandas?",
  "context": "import pandas as pd\ndf = pd.read_csv('data.csv')\nprint(df.head())",
  "error": "FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'"
}
response_data type: <class 'str'>
{'user_query': 'How do I fix the FileNotFoundError when trying to read a CSV file with pandas?', 'context': "import pandas as pd\ndf = pd.read_csv('data.csv')\nprint(df.head())", 'error': "FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'"}


In [21]:
test_input = "implement a function to call the API and return the response"

preprocessed_user_input = parse_developer_query(user_input = test_input)
print(preprocessed_user_input)

response_data: {
  "user_query": "How do I implement a function to call an API and return the response?",
  "context": "",
  "error": ""
}
response_data type: <class 'str'>
{'user_query': 'How do I implement a function to call an API and return the response?', 'context': '', 'error': ''}
