In [1]:
import os 
import json
import pandas as pd
import traceback

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
key = os.getenv("GROQ_API_KEY")

In [4]:
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
import PyPDF2


In [5]:
RESPONSE_JSON = {
  "quiz_info": {
    "title": "Quiz Title",
    "subject": "Subject Name",
    "difficulty": "easy/medium/hard",
    "total_questions": 3
  },
  "questions": [
    {
      "id": 1,
      "question": "What is the main topic discussed in the text?",
      "options": {
        "A": "Option A text",
        "B": "Option B text",
        "C": "Option C text",
        "D": "Option D text"
      },
      "correct_answer": "A",
      "explanation": "Brief explanation of why A is correct"
    },
    {
      "id": 2,
      "question": "Which of the following statements is true?",
      "options": {
        "A": "Option A text",
        "B": "Option B text",
        "C": "Option C text",
        "D": "Option D text"
      },
      "correct_answer": "B",
      "explanation": "Brief explanation of why B is correct"
    },
    {
      "id": 3,
      "question": "What would be the best approach for this scenario?",
      "options": {
        "A": "Option A text",
        "B": "Option B text",
        "C": "Option C text",
        "D": "Option D text"
      },
      "correct_answer": "C",
      "explanation": "Brief explanation of why C is correct"
    }
  ]
}

In [6]:
TEMPLATE = """
SYSTEM: You are a JSON-only MCQ generator. You must output ONLY valid JSON.

INPUT TEXT:
{text}

TASK: Create {number} MCQs for {subject} students ({tone} tone)

OUTPUT: ONLY valid JSON in this exact format:

{response_json}

CRITICAL JSON RULES:
✓ MUST start with {{ and end with }}
✓ All keys in double quotes: "key"
✓ All string values in double quotes: "value"  
✓ Separate with commas: "key": "value",
✓ No trailing commas before }} or ]
✓ Every question has: "id", "question", "options", "correct_answer", "explanation"
✓ Options object has: "A", "B", "C", "D" keys
✓ Correct answer is: "A", "B", "C", or "D"

FORBIDDEN:
✗ No text before or after JSON
✗ No markdown formatting
✗ No comments or explanations
✗ No single quotes
✗ No trailing commas
✗ Do NOT start with "quiz_info" - must start with {{

RESPOND WITH ONLY THE JSON OBJECT STARTING WITH {{ AND ENDING WITH }}.
"""

In [7]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
    )

In [8]:
llm = ChatGroq(
    model="llama3-8b-8192",
    groq_api_key=key,
    temperature=0.3,  # Lower temperature for consistent, factual responses
    max_tokens=2048,  # Higher limit for detailed MCQ explanations
    top_p=0.8,       # Slightly lower for more focused responses
    stop=["\n\nQuestion:", "\n\n---", "\n\n###"]  # Stop at question boundaries
)



                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


In [9]:
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

  quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)


In [10]:
TEMPLATE2 = """
# Quiz Review and Enhancement Instructions

## Input Quiz
{quiz}

## Task Description
You are an expert English grammarian and writer. Given a Multiple Choice Quiz for {subject} students, you need to evaluate the complexity of the questions and provide a complete analysis of the quiz.

## Review Requirements
1. **Complexity Analysis**: Evaluate if questions match student cognitive abilities (max 50 words)
2. **Quality Assessment**: Check grammar, clarity, and educational value
3. **Difficulty Adjustment**: Update questions that don't match student level
4. **Tone Optimization**: Adjust tone to perfectly fit student abilities
5. **Content Validation**: Ensure questions are appropriate for {subject} students

## Analysis Guidelines
- **Cognitive Level**: Assess if questions are too easy, too hard, or just right
- **Language Complexity**: Check vocabulary and sentence structure
- **Question Clarity**: Ensure questions are unambiguous and well-written
- **Option Quality**: Verify distractors are plausible but clearly incorrect
- **Educational Value**: Confirm questions promote learning and understanding

## Enhancement Instructions
- If questions are too complex: Simplify language and reduce difficulty
- If questions are too simple: Increase complexity and add analytical elements
- If tone is inappropriate: Adjust to match student age and subject level
- If grammar is poor: Correct all grammatical and punctuation errors
- If explanations are unclear: Improve clarity and educational value

## Output Format
Provide your analysis and any necessary updates in the following structure:

### COMPLEXITY ANALYSIS
[Your 50-word complexity assessment]

### QUALITY ASSESSMENT
[Grammar, clarity, and educational value review]

### ENHANCED QUIZ
[Updated quiz with improved questions, tone, and difficulty level]

### CHANGES MADE
[List of specific improvements and modifications]

## Instructions
- Be thorough but concise in your analysis
- Focus on educational appropriateness and student engagement
- Maintain the original quiz structure while improving quality
- Ensure all changes enhance learning outcomes
"""

In [11]:
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE2)

In [12]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [13]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                        output_variables=["quiz", "review"], verbose=False,)

In [14]:
file_path = "D:\MCQ_Generator\data.txt"

In [15]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [16]:
NUMBER= 7
SUBJECT="computer vision"
TONE="simple"

In [17]:
response=generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )

  response=generate_evaluate_chain(
Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


Prompt after formatting:
[32;1m[1;3m
SYSTEM: You are a JSON-only MCQ generator. You must output ONLY valid JSON.

INPUT TEXT:
Computer vision tasks include methods for acquiring, processing, analyzing, and understanding digital images, and extraction of high-dimensional data from the real world in order to produce numerical or symbolic information, e.g. in the form of decisions.[1][2][3][4] "Understanding" in this context signifies the transformation of visual images (the input to the retina) into descriptions of the world that make sense to thought processes and can elicit appropriate action. This image understanding can be seen as the disentangling of symbolic information from image data using models constructed with the aid of geometry, physics, statistics, and learning theory.

The scientific discipline of computer vision is concerned with the theory behind artificial systems that extract information from images. Image data can take many forms, such as video sequences, views from

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")



[1m> Finished chain.[0m
Prompt after formatting:
[32;1m[1;3m
# Quiz Review and Enhancement Instructions

## Input Quiz
{
"quiz_info": {
"title": "Computer Vision Quiz",
"subject": "Computer Vision",
"difficulty": "easy",
"total_questions": 7
},
"questions": [
{
"id": 1,
"question": "What is the main topic discussed in the text?",
"options": {
"A": "Acquiring, processing, and analyzing digital images",
"B": "Understanding digital images",
"C": "Extracting high-dimensional data from the real world",
"D": "Applying computer vision theories to construction"
},
"correct_answer": "B",
"explanation": "The text discusses the scientific discipline of computer vision and its concern with the theory behind artificial systems that extract information from images."
},
{
"id": 2,
"question": "What is the term for the transformation of visual images into descriptions of the world?",
"options": {
"A": "Image understanding",
"B": "Scene reconstruction",
"C": "Object detection",
"D": "Activity reco

In [18]:
quiz = (response["quiz"])
type(quiz)

str

In [354]:
import pandas as pd
from datetime import datetime

In [19]:
# Check what variables you have
print("Available variables:")
print([var for var in dir() if not var.startswith('_')])

# If you have a response, check its name
# Common names might be: response, result, mcqs, mcq_response, etc.

Available variables:
['ChatGroq', 'In', 'LLMChain', 'NUMBER', 'Out', 'PromptTemplate', 'PyPDF2', 'RESPONSE_JSON', 'SUBJECT', 'SequentialChain', 'TEMPLATE', 'TEMPLATE2', 'TEXT', 'TONE', 'exit', 'file', 'file_path', 'generate_evaluate_chain', 'get_ipython', 'json', 'key', 'llm', 'load_dotenv', 'open', 'os', 'pd', 'quit', 'quiz', 'quiz_chain', 'quiz_evaluation_prompt', 'quiz_generation_prompt', 'response', 'review_chain', 'traceback']


In [356]:
# Check the type and structure of your response
print(f"Response type: {type(response)}")
print(f"Response content: {response}")

# If it's a dictionary, check its keys
if isinstance(response, dict):
    print(f"Dictionary keys: {list(response.keys())}")
    # Common keys might be: 'content', 'text', 'response', 'result', etc.

Response type: <class 'dict'>
Response content: {'text': 'Computer vision tasks include methods for acquiring, processing, analyzing, and understanding digital images, and extraction of high-dimensional data from the real world in order to produce numerical or symbolic information, e.g. in the form of decisions.[1][2][3][4] "Understanding" in this context signifies the transformation of visual images (the input to the retina) into descriptions of the world that make sense to thought processes and can elicit appropriate action. This image understanding can be seen as the disentangling of symbolic information from image data using models constructed with the aid of geometry, physics, statistics, and learning theory.\n\nThe scientific discipline of computer vision is concerned with the theory behind artificial systems that extract information from images. Image data can take many forms, such as video sequences, views from multiple cameras, multi-dimensional data from a 3D scanner, 3D poin

In [20]:
# Extract the quiz content from your response
quiz_content = response['quiz']
print("🔍 YOUR MCQ QUIZ:")
print("=" * 80)
print(quiz_content)
print("=" * 80)

🔍 YOUR MCQ QUIZ:
{
"quiz_info": {
"title": "Computer Vision Quiz",
"subject": "Computer Vision",
"difficulty": "easy",
"total_questions": 7
},
"questions": [
{
"id": 1,
"question": "What is the main topic discussed in the text?",
"options": {
"A": "Acquiring, processing, and analyzing digital images",
"B": "Understanding digital images",
"C": "Extracting high-dimensional data from the real world",
"D": "Applying computer vision theories to construction"
},
"correct_answer": "B",
"explanation": "The text discusses the scientific discipline of computer vision and its concern with the theory behind artificial systems that extract information from images."
},
{
"id": 2,
"question": "What is the term for the transformation of visual images into descriptions of the world?",
"options": {
"A": "Image understanding",
"B": "Scene reconstruction",
"C": "Object detection",
"D": "Activity recognition"
},
"correct_answer": "A",
"explanation": "Image understanding is the transformation of visual imag

In [358]:
import pandas as pd
from datetime import datetime

# Function to save MCQs to CSV
def save_mcqs_to_csv(quiz_data, filename=None):
    """Save MCQs to CSV format"""
    if not quiz_data or 'questions' not in quiz_data:
        print("No quiz data to save")
        return
    
    # Create rows for CSV
    rows = []
    for question in quiz_data['questions']:
        row = {
            'Question_Number': question.get('id', ''),
            'Question': question.get('question', ''),
            'Option_A': question.get('options', {}).get('A', ''),
            'Option_B': question.get('options', {}).get('B', ''),
            'Option_C': question.get('options', {}).get('C', ''),
            'Option_D': question.get('options', {}).get('D', ''),
            'Correct_Answer': question.get('correct_answer', ''),
            'Explanation': question.get('explanation', '')
        }
        rows.append(row)
    
    # Create DataFrame
    df = pd.DataFrame(rows)
    
    # Generate filename if not provided
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        subject = quiz_data.get('quiz_info', {}).get('subject', 'quiz').replace(' ', '_').lower()
        filename = f"{subject}_mcqs_{timestamp}.csv"
    
    # Save to CSV
    df.to_csv(filename, index=False, encoding='utf-8')
    print(f"✅ MCQs saved to CSV: {filename}")
    print(f"📊 Total questions saved: {len(rows)}")
    
    return filename

# Save your current MCQs to CSV
if quiz_data:
    csv_filename = save_mcqs_to_csv(quiz_data)
    
    # Display preview of the CSV
    print(f"\n�� CSV Preview:")
    print("=" * 80)
    df_preview = pd.read_csv(csv_filename)
    print(df_preview.head())
    
    print(f"\n📁 File saved as: {csv_filename}")
else:
    print("No quiz data available to save")

No quiz data available to save


In [370]:
quiz = response.get("quiz")
print(type(quiz))
print(quiz)

<class 'str'>
{
"quiz_info": {
"title": "Computer Vision Quiz",
"subject": "Computer Vision",
"difficulty": "easy",
"total_questions": 10
},
"questions": [
{
"id": 1,
"question": "What is the main topic discussed in the text?",
"options": {
"A": "Acquiring, processing, analyzing, and understanding digital images",
"B": "Machine learning and deep learning",
"C": "Image recognition and object detection",
"D": "Scene reconstruction and 3D modeling"
},
"correct_answer": "A",
"explanation": "The text discusses the main topic of computer vision, which is acquiring, processing, analyzing, and understanding digital images."
},
{
"id": 2,
"question": "What is the scientific discipline of computer vision concerned with?",
"options": {
"A": "Applying computer vision theories to real-world problems",
"B": "The theory behind artificial systems that extract information from images",
"C": "Developing new computer vision algorithms",
"D": "Creating computer vision systems for medical applications"
},


In [26]:
from groq import Groq
GROQ_API_KEY = key

# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)

# Define the prompt template
fixing_string = PromptTemplate(
    input_variables=["quiz"],
    template="""
You are a helpful assistant that fixes and formats broken JSON strings. 

The user will provide you with a string that is intended to be JSON but may contain syntax errors like:
- Missing commas, quotes, or brackets
- Wrongly escaped characters
- Trailing commas or unbalanced braces

Your task is to:  
✅ Parse and repair the string into valid JSON  
✅ Add any missing punctuation (commas, colons, quotes, braces, brackets)  
✅ Escape special characters properly  
✅ Return the result as pretty-printed JSON (indented and easy to read).  

🚨 Do not modify the actual data or add fake fields. Only fix syntax and formatting issues.  
If the string is already valid JSON, just pretty-print it.

Here is the broken JSON string:{quiz}
"""
)

In [27]:
def fix_json(broken_json: str) -> str:
    """
    Takes a broken JSON string, sends it to LLaMA 3 on Groq API,
    and returns the fixed, pretty-printed JSON string.
    """
    # Format the prompt with the malformed JSON
    prompt = fixing_string.format(quiz=broken_json)

    # Call Groq API
    response = client.chat.completions.create(
        model="llama3-70b-8192",  # Or "mixtral-8x7b-32768" if you prefer faster
        messages=[
            {"role": "system", "content": "You are a JSON fixing assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=4096,
        temperature=0  # Deterministic output
    )

    # Extract the fixed JSON
    fixed_json = response.choices[0].message.content.strip()
    return fixed_json

In [None]:
quiz_json = fix_json(quiz)


In [29]:
print(quiz_json)

Here is the fixed and pretty-printed JSON:

```
{
  "quiz_info": {
    "title": "Computer Vision Quiz",
    "subject": "Computer Vision",
    "difficulty": "easy",
    "total_questions": 7
  },
  "questions": [
    {
      "id": 1,
      "question": "What is the main topic discussed in the text?",
      "options": {
        "A": "Acquiring, processing, and analyzing digital images",
        "B": "Understanding digital images",
        "C": "Extracting high-dimensional data from the real world",
        "D": "Applying computer vision theories to construction"
      },
      "correct_answer": "B",
      "explanation": "The text discusses the scientific discipline of computer vision and its concern with the theory behind artificial systems that extract information from images."
    },
    {
      "id": 2,
      "question": "What is the term for the transformation of visual images into descriptions of the world?",
      "options": {
        "A": "Image understanding",
        "B": "Scene r