In [None]:
from langchain_groq import ChatGroq
from sentence_transformers import SentenceTransformer
import pinecone
from pinecone import Pinecone
import json


# Function to search MCQs based on user query across multiple namespaces
def search_mcqs_by_query(index, query, namespaces, top_k=50):
    """
    Searches for MCQs across multiple namespaces and returns the best matches.
    """
    model = SentenceTransformer('all-mpnet-base-v2')
    query_embedding = model.encode(query)
    all_results = []

    for namespace in namespaces:
        try:
            response = index.query(vector=query_embedding.tolist(), namespace=namespace, top_k=top_k, include_metadata=True)
            for match in response["matches"]:
                all_results.append({
                    "id": match["id"],
                    "metadata": match["metadata"],
                    "score": match["score"],
                    "namespace": namespace
                })
        except Exception as e:
            print(f"Error searching namespace '{namespace}': {e}")

    # Sort all results by score in descending order
    all_results.sort(key=lambda x: x["score"], reverse=True)
    
    
    return all_results[:top_k]  # Return top_k best matches across all namespaces


# Function to generate a quiz using ChatGroq (LLaMA model)
def generate_quiz_with_groq(llm, retrieved_data, query, num_questions):
    """
    Generates a quiz using the LLM, allowing augmentation beyond the retrieved data.
    """
    # Format the retrieved data into a usable string format
    formatted_mcqs = ""
    for mcq in retrieved_data['mcqs']:
        formatted_mcqs += f"""
        {{
            "question_no": "{mcq['question_no']}",
            "question_text": "{mcq['question_text']}",
            "question_img_link": "{mcq['question_img_link']}",
            "options": {mcq['options']},
            "correct_option": "{mcq['correct_option']}",
            "topic": "{mcq['topic']}"
        }},
        """
    
    groq_prompt = f"""
    Based on the following retrieved MCQ data, create a quiz with exactly {num_questions} multiple-choice questions.
    Align the quiz with the user query: "{query}". 
    Use the provided MCQs as much as possible, but if necessary, generate additional relevant questions to complete the quiz. Ensure that all questions are relevant and derived from the retrieved data.

    The retrieved MCQs are as follows:
    {formatted_mcqs}
    Instructions:
    1. Rewrite or enhance questions if needed to ensure clarity, conciseness, and relevance.
    2. Ensure all questions are formatted consistently in terms of structure and wording.
    Please return the quiz in the following exact JSON format starting from json format without any line in starting and '''json string:
    {{
        "quiz_title": "CPU Scheduling Quiz",
        "quiz_description": "A quiz on CPU Scheduling algorithms and concepts",
        "questions": [
            {{
                "question_no": "Question49",
                "question_text": "The question text here",
                "question_img_link": "The image URL",
                "options": [
                    {{
                        "option": "Option A",
                        "is_correct": true
                    }},
                    {{
                        "option": "Option B",
                        "is_correct": false
                    }},
                    {{
                        "option": "Option C",
                        "is_correct": false
                    }},
                    {{
                        "option": "Option D",
                        "is_correct": false
                    }}
                ],
                "correct_answer": "The correct answer",
                "correct_answer_explanation": "Explanation for the correct answer",
                "topic": "Cpu Scheduling"
            }},
            ...
        ]
    }}
    """
    
    # Get the response from the LLaMA model
    response = llm.invoke(groq_prompt)

    # Try to parse the response content as JSON
    return clean_response(response.content)
    


# Function to integrate Pinecone query with quiz generation
def generate_quiz_from_pinecone(query, namespaces, top_k=50, num_questions=10):
    """
    Generates a quiz using data retrieved from Pinecone, aligned with the query and user constraints.
    """
    # Initialize Pinecone
    pc = Pinecone(api_key="pcsk_3CYnJi_TZbGr8CeCcVxAsz4Li7J5n5hNBRqM7PA7k6xGKx7ftNXUYMYUJLJcb3PZrTneH4", environment="us-west1-gcp")
    index_name = "mcq-index"
    index = pc.Index(index_name)
    
    # Retrieve the MCQs based on the user query
    mcq_results = search_mcqs_by_query(index, query, namespaces, top_k)
    if not mcq_results:
        return "No MCQs found for the given query."

    # Convert retrieved MCQ data into a structured JSON format
    retrieved_mcqs = []
    for match in mcq_results:
        metadata = match["metadata"]
        retrieved_mcqs.append({
            "topic": metadata.get("topic"),
            "question_no": metadata.get("question_no"),
            "question_text": metadata.get("question_text"),
            "question_img_link": metadata.get("question_img_link"),
            "options": metadata.get("options"),
            "correct_option": metadata.get("correct_option")
        })

    # Convert retrieved data into a JSON-like string for LLM
    retrieved_data = {
        "query": query,
        "mcqs": retrieved_mcqs
    }

    # Initialize ChatGroq LLM
    llm = ChatGroq(
        temperature=0,
        groq_api_key="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",  # Replace with your actual API key
        model_name="llama-3.1-70b-versatile"
    )
    
    # Generate quiz using ChatGroq
    return generate_quiz_with_groq(llm, retrieved_data, query, num_questions)
    


# Example usage
query = input("Enter your search query: ")
namespaces = ["computer_organization", "operating_system"]  # Add or modify namespaces as needed
top_k = 30  # Default value for maximum results to retrieve
num_questions = int(input("Enter the number of MCQs you want in the quiz: "))

# Generate quiz based on user query
quiz = generate_quiz_from_pinecone(query, namespaces, top_k, num_questions)
print(quiz)


In [12]:
def clean_response(response):
    # Clean up any unwanted characters (e.g., leading non-JSON content)
    cleaned_response = response.lstrip()  # Removes any leading whitespace

    # Check for and remove unwanted introductory text
    if cleaned_response.startswith("Here is the quiz"):
        # Find the index of the start of the actual JSON part
        json_start_index = cleaned_response.find("{")
        cleaned_response = cleaned_response[json_start_index:].lstrip()

    # Remove the "```json" if it exists at the start of the response
    if cleaned_response.startswith("```json"):
        cleaned_response = cleaned_response[len("```json"):].lstrip()

    # Check if the cleaned response starts with a valid JSON object (i.e., '{')
    if cleaned_response.startswith("{"):
        try:
            # Parse the cleaned-up JSON and return as a Python dictionary (not a string)
            parsed_response = json.loads(cleaned_response)
            return parsed_response  # Return the parsed JSON (as a Python dictionary)
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            print("Cleaned Response:", cleaned_response)
            return None
    else:
        print("The cleaned response doesn't start with a valid JSON object.")
        print("Cleaned Response:", cleaned_response)
        return None


In [None]:
print(type(quiz))