In [None]:
!pip install langgraph langchain pandas faiss-cpu langchain-groq sentence-transformers langchain_community

In [2]:
from google.colab import userdata
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langgraph.graph import END, StateGraph
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from typing import TypedDict, List

# Load Groq API key and CSV files
groq_api = userdata.get("groq_api_key")
df = pd.read_csv("/content/defects.csv")  # Contains defects and solutions
test_cases_df = pd.read_csv("/content/test_cases.csv")  # Contains test cases

# Prepare documents for retrieval
docs = []
for _, row in df.iterrows():
    if pd.notna(row["Description"]) and pd.notna(row["Solution"]):
        docs.append(Document(
            page_content=row["Description"],
            metadata={
                "solution": row["Solution"],
                "module": row["Module"]
            }
        ))

# Embedding model and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(docs, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 1})

# Define state for the agent
class AgentState(TypedDict):
    input: str
    context: List[dict]
    response: str

# Initialize LLM
llm = ChatGroq(
    groq_api_key=groq_api,
    temperature=0.3,
    model_name="gemma2-9b-it",
)

# Step 1: Retrieve solution for the defect
def retrieve(state: AgentState):
    try:
        relevant_docs = retriever.invoke(state["input"])
        return {"context": relevant_docs}
    except Exception as e:
        return {"context": []}

# Step 2: Validate solution with test cases or generate new ones
def validate_or_generate_test_cases(state: AgentState):
    try:
        if not state["context"]:
            return {"response": "**Error**: The defect could not be found in the database."}

        context = state["context"][0]
        error_message = state["input"]
        solution = context.metadata["solution"]
        module = context.metadata["module"]

        # Generate an explanation for the solution
        explanation_prompt = """
        [INST] Explain why this solution fixes the following error:
        Error: {error}
        Solution: {solution}
        [/INST]
        """
        explanation = llm.invoke(ChatPromptTemplate.from_template(explanation_prompt).format(
            error=error_message,
            solution=solution
        )).content.strip()

        # Check if test cases exist
        matching_test_cases = test_cases_df[test_cases_df["Module"] == module]
        if not matching_test_cases.empty:
            test_cases = "\n".join([
                f"**Test Scenario (Fetched from CSV)**: {row['Test_Scenario']}\n"
                f"**Test Steps**: {row['Test_Steps']}\n"
                f"**Pre Requisites**: {row['Pre_Requisites']}\n"
                f"**Expected Result**: {row['Expected_Result']}\n"
                f"**Pass/Fail Criteria**: {row['Pass_Fail_Criteria']}\n"
                for _, row in matching_test_cases.iterrows()
            ])
            response_template = """**Error:**\n{Error}\n\n**Solution (Fetched from CSV):**\n{Solution}\n\n**Explanation:**\n{Explanation}\n\n**Test Cases (Fetched from CSV):**\n{TestCases}"""
            return {"response": response_template.format(
                Error=error_message,
                Solution=solution,
                Explanation=explanation,
                TestCases=test_cases
            )}

        # Generate new test cases
        prompt_template = """
        [INST] Given this error and known solution:
        Error: {error}
        Solution: {solution}
        Generate **exactly** 2 structured test cases to validate that solution fixes the issue:
        - 1 Positive Test Case(where solution works correctly)
        - 1 Negative Test Case(where solution fails or is misconfigured)
        Each test case should be unique and structured in this format:
        1. **Test Scenario**: Describe what is being tested
        2. **Test Steps**: Step-by-step actions to perform
        3. **Pre Requisites**: What should be done before the test
        4. **Expected Results**: What should happen if the solution is correct
        5. **Pass/Fail Criteria**: How to determine if the test passes or fails
        [/INST]
        """
        generated_test_cases = llm.invoke(ChatPromptTemplate.from_template(prompt_template).format(
            error=error_message,
            solution=solution
        )).content
        response_template = """**Error:**\n{Error}\n\n**Solution (Generated by Agent):**\n{Solution}\n\n**Explanation:**\n{Explanation}\n\n**Generated Test Cases (Generated by Agent):**\n{TestCases}"""
        return {"response": response_template.format(
            Error=error_message,
            Solution=solution,
            Explanation=explanation,
            TestCases=generated_test_cases.strip()
        )}
    except Exception as e:
        return {"response": f"Error processing request: {str(e)}"}

# Define the workflow
workflow = StateGraph(AgentState)
workflow.add_node("retrieve", retrieve)
workflow.add_node("validate_or_generate_test_cases", validate_or_generate_test_cases)
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "validate_or_generate_test_cases")
workflow.add_edge("validate_or_generate_test_cases", END)

# Compile the agent
agent = workflow.compile()

# Feedback storage
feedback_df = pd.DataFrame(columns=["Error", "Solution", "Rating"])

# Function to collect feedback
def collect_feedback(error_message, solution, rating):
    global feedback_df
    new_feedback = pd.DataFrame({
        "Error": [error_message],
        "Solution": [solution],
        "Rating": [rating]
    })
    feedback_df = pd.concat([feedback_df, new_feedback], ignore_index=True)
    feedback_df.to_csv("/content/feedback.csv", index=False)

# Function to invoke the agent and handle feedback
def get_solution_with_feedback(error_message):
    while True:
        result = agent.invoke({"input": error_message.strip()})
        print(result["response"])

        # Collect rating
        try:
            rating = int(input("Rate the solution (1-5): "))
            if rating < 1 or rating > 5:
                print("Invalid rating. Please enter a number between 1 and 5.")
                continue
        except ValueError:
            print("Invalid input. Please enter a number between 1 and 5.")
            continue

        collect_feedback(error_message, result["response"], rating)

        # If rating is below 3, generate an alternative solution and test cases
        if rating < 3:
            print("\nGenerating an alternative solution...\n")

            # Generate an alternative solution
            alternative_prompt = """
            [INST] Provide a concise and actionable alternative solution for the following error:
            Error: {error}
            Ensure the solution is clear and does not include any follow-up questions or requests for clarification.
            [/INST]
            """
            alternative_solution = llm.invoke(ChatPromptTemplate.from_template(alternative_prompt).format(
                error=error_message
            )).content.strip()

            # Generate test cases for the alternative solution
            test_case_prompt = """
            [INST] Given this error and the new solution:
            Error: {error}
            Solution: {solution}
            Generate **exactly** 2 structured test cases to validate that solution fixes the issue:
            - 1 Positive Test Case(where solution works correctly)
            - 1 Negative Test Case(where solution fails or is misconfigured)
            Each test case should be unique and structured in this format:
            1. **Test Scenario**: Describe what is being tested
            2. **Test Steps**: Step-by-step actions to perform
            3. **Pre Requisites**: What should be done before the test
            4. **Expected Results**: What should happen if the solution is correct
            5. **Pass/Fail Criteria**: How to determine if the test passes or fails
            [/INST]
            """
            generated_test_cases = llm.invoke(ChatPromptTemplate.from_template(test_case_prompt).format(
                error=error_message,
                solution=alternative_solution
            )).content.strip()

            # Print the alternative solution and test cases
            print(f"**Alternative Solution (Generated by Agent):**\n{alternative_solution}")
            print(f"\n**Generated Test Cases (Generated by Agent):**\n{generated_test_cases}")

            # Store feedback for the alternative solution
            collect_feedback(error_message, alternative_solution, rating)

            break  # Stop after generating the alternative solution and test cases
        else:
            print("Thank you for your feedback!")
            break

# Example usage
if __name__ == "__main__":
    print("=== Exact Error ===")
    get_solution_with_feedback("Search results not displaying correctly")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

=== Exact Error ===
**Error:**
Search results not displaying correctly

**Solution (Fetched from CSV):**
Updated Elasticsearch query logic to handle special characters.

**Explanation:**
The solution fixes the "Search results not displaying correctly" error by addressing a potential issue with how Elasticsearch interprets special characters in search queries. 

Here's why:

* **Special characters can cause problems:**  Characters like spaces, punctuation marks, or symbols often have special meanings in search queries (e.g., "AND", "OR", quotation marks for phrase searching). If Elasticsearch doesn't handle these characters correctly, it might misinterpret the search intent, leading to inaccurate or incomplete results.
* **Updated query logic:** The "Updated Elasticsearch query logic" part of the solution implies that the code responsible for constructing the search queries has been modified. This update likely involves:
    * **Escaping special characters:**  Adding special characters 

In [3]:
get_solution_with_feedback("dates format is showing different for asia and for africa")

**Error:**
dates format is showing different for asia and for africa

**Solution (Fetched from CSV):**
Updated date formatting rules for regional settings.

**Explanation:**
The solution fixes the error by addressing the root cause: **inconsistent date formatting based on region**. 

Here's a breakdown:

* **Problem:** Different regions (like Asia and Africa) have varying conventions for displaying dates (e.g., day/month/year vs. month/day/year). This leads to the "dates format is showing different" error.
* **Solution:** Updating the date formatting rules for regional settings ensures that the system uses the appropriate date format based on the user's location. This means dates will be displayed consistently within each region, resolving the inconsistency issue.


Essentially, the solution customizes the date display to match local preferences, preventing confusion and ensuring accurate date representation across different regions.

**Test Cases (Fetched from CSV):**
**Test Scenario 