In [1]:
import pandas as pd

In [2]:
from gemini_llm import GeminiChatModel
from langchain_core.messages import HumanMessage
from langchain.chains import LLMChain, SequentialChain
from langchain_core.prompts import PromptTemplate
import PyPDF2
import json

llm = GeminiChatModel()



In [3]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here"
        },
        "correct": "correct answer"
    }
}

In [4]:
TEMPLATE = """
Text: {text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming to the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [5]:
TEMPLATE2 = """
You are an expert English grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity.\
If the quiz is not at par with the cognitive and analytical abilities of the students,\
update the quiz questions which need to be changed and change the tone such that it perfectly fits the student abilities.
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [6]:
mcq_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
)

evaluation_prompt = PromptTemplate(
    input_variables=["quiz", "subject"],
    template=TEMPLATE2
)

In [7]:
mcq_chain = LLMChain(
    llm=llm,
    prompt=mcq_prompt,
    output_key="quiz"
)

  mcq_chain = LLMChain(


In [8]:
evaluation_chain = LLMChain(
    llm=llm,
    prompt=evaluation_prompt,
    output_key="review"
)

In [9]:
final_chain = SequentialChain(
    chains=[mcq_chain, evaluation_chain],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True
)

In [10]:
raw_result = final_chain({
    "text": "Python is a versatile language used for automation, data analysis, and AI applications.",
    "number": 3,
    "subject": "Computer Science",
    "tone": "educational",
    "response_json": RESPONSE_JSON
})

  raw_result = final_chain({




[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [11]:
# ---------------- Post-process: Clean JSON ---------------- #
def safe_json_parse(raw_text):
    """Clean Gemini output and safely parse JSON."""
    try:
        cleaned = raw_text.strip("```json").strip("```").strip()
        return json.loads(cleaned)
    except json.JSONDecodeError:
        print("‚ö†Ô∏è Model output is not valid JSON, returning raw text.")
        return raw_text


# Parse both raw outputs
generated_quiz = safe_json_parse(raw_result["quiz"])
reviewed_quiz = safe_json_parse(raw_result["review"])

‚ö†Ô∏è Model output is not valid JSON, returning raw text.


In [12]:
print("\nüß© Original Generated MCQs:\n", json.dumps(generated_quiz, indent=2))
print("\n‚úçÔ∏è Final Reviewed MCQs (JSON):\n", json.dumps(reviewed_quiz, indent=2))


üß© Original Generated MCQs:
 {
  "1": {
    "mcq": "According to the provided text, what is a primary characteristic of the Python language?",
    "options": {
      "a": "It is primarily used for hardware programming.",
      "b": "It is a versatile language.",
      "c": "It is an outdated scripting language.",
      "d": "It is exclusively a low-level language."
    },
    "correct": "b"
  },
  "2": {
    "mcq": "Which of the following application areas is explicitly mentioned in the text as a use case for Python?",
    "options": {
      "a": "Mobile game development",
      "b": "Operating system kernel development",
      "c": "Data analysis",
      "d": "Web browser design"
    },
    "correct": "c"
  },
  "3": {
    "mcq": "The text states that Python is utilized for which specific set of applications?",
    "options": {
      "a": "Frontend web development, database administration, and graphic design.",
      "b": "Automation, data analysis, and AI applications.",
      "c"

In [13]:
file_path=r"C:\Users\ADMIN\Desktop\mcqgen\data.txt"

In [14]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [15]:
print(TEXT)

Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalise to unseen data, and thus perform tasks without explicit instructions.[1] Within a subdiscipline in machine learning, advances in the field of deep learning have allowed neural networks, a class of statistical algorithms, to surpass many previous machine learning approaches in performance.

ML finds application in many fields, including natural language processing, computer vision, speech recognition, email filtering, agriculture, and medicine. The application of ML to business problems is known as predictive analytics.

Statistics and mathematical optimisation (mathematical programming) methods comprise the foundations of machine learning. Data mining is a related field of study, focusing on exploratory data analysis (EDA) via unsupervised learning.[3][4]

From a theoretical viewpoint, probably approximately corr

In [16]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [17]:
# ============= Token Tracking Setup ============= #
print("Step 1: Importing GeminiUsageCallback...")
from gemini_callback import GeminiUsageCallback
print("  ‚úì Imported")

print("Step 2: Creating callback...")
callback = GeminiUsageCallback()
print("  ‚úì Callback created")

print("Step 3: Creating LLM with callbacks...")
llm_with_callbacks = GeminiChatModel(callbacks=[callback])
print("  ‚úì LLM created")

print("Step 4: Creating MCQ chain...")
mcq_chain_tracked = LLMChain(
    llm=llm_with_callbacks,
    prompt=mcq_prompt,
    output_key="quiz"
)
print("  ‚úì MCQ chain created")

print("Step 5: Creating evaluation chain...")
evaluation_chain_tracked = LLMChain(
    llm=llm_with_callbacks,
    prompt=evaluation_prompt,
    output_key="review"
)
print("  ‚úì Evaluation chain created")

print("Step 6: Creating final sequential chain...")
final_chain_tracked = SequentialChain(
    chains=[mcq_chain_tracked, evaluation_chain_tracked],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],
    verbose=False
)
print("  ‚úì Final chain created")

print("\n‚úÖ Token tracking chains created successfully!")

Step 1: Importing GeminiUsageCallback...
  ‚úì Imported
Step 2: Creating callback...
  ‚úì Callback created
Step 3: Creating LLM with callbacks...
  ‚úì LLM created
Step 4: Creating MCQ chain...
  ‚úì MCQ chain created
Step 5: Creating evaluation chain...
  ‚úì Evaluation chain created
Step 6: Creating final sequential chain...
  ‚úì Final chain created

‚úÖ Token tracking chains created successfully!


In [18]:
# ============= Execute Final Chain with Token Tracking ============= #
import time

print("‚è≥ Running final chain with token tracking...\n")
start_time = time.time()

try:
    raw_result = final_chain_tracked({
        "text": "Python is a versatile language used for automation, data analysis, and AI applications.",
        "number": 3,
        "subject": "Computer Science",
        "tone": "educational",
        "response_json": RESPONSE_JSON
    })
    
    elapsed = time.time() - start_time
    print(f"\n‚úÖ Chain execution complete! (took {elapsed:.2f}s)")
    
except Exception as e:
    print(f"‚ùå Error during chain execution: {str(e)}")
    import traceback
    traceback.print_exc()

‚è≥ Running final chain with token tracking...


‚úÖ Chain execution complete! (took 21.44s)


In [19]:
# ============= Create Quiz Table ============= #
# Make sure raw_result exists
if 'raw_result' not in locals():
    print("‚ùå Error: Chain has not been executed yet!")
    print("Please run the 'Execute Final Chain' cell first.")
else:
    # Parse the quiz if not already done
    if not isinstance(generated_quiz, dict):
        generated_quiz = safe_json_parse(raw_result["quiz"])
    
    quiz_table_data = []
    for key, value in generated_quiz.items():
        question = value["mcq"]
        options = "|".join(
            [
                f"{option}: {option_value}"
                for option, option_value in value['options'].items() 
            ]
        )
        correct_answer = value['correct']
        quiz_table_data.append({
            "MCQ": question,
            "Choices": options,
            "Correct": correct_answer
        })
    
    # Display as DataFrame
    df_quiz = pd.DataFrame(quiz_table_data)
    print("\n" + "="*60)
    print("üìã QUIZ TABLE")
    print("="*60)
    print(df_quiz.to_string(index=False))


üìã QUIZ TABLE
                                                                                                   MCQ                                                                                                                                                                                                                                                                 Choices Correct
              According to the provided text, what is a primary characteristic of the Python language?                                                                                                    a: It is primarily used for hardware programming.|b: It is a versatile language.|c: It is an outdated scripting language.|d: It is exclusively a low-level language.       b
Which of the following application areas is explicitly mentioned in the text as a use case for Python?                                                                                                                                   

In [20]:
# ============= Display Results & Token Usage ============= #
print("\n" + "="*60)
print("üìä TOKEN USAGE SUMMARY")
print("="*60)
callback.print_summary()

print("\n" + "="*60)
print("üß© GENERATED MCQs")
print("="*60)
generated_quiz = safe_json_parse(raw_result["quiz"])
print(json.dumps(generated_quiz, indent=2))

print("\n" + "="*60)
print("‚úçÔ∏è REVIEWED MCQs")
print("="*60)
reviewed_quiz = safe_json_parse(raw_result["review"])
print(json.dumps(reviewed_quiz, indent=2))


üìä TOKEN USAGE SUMMARY
üßÆ Gemini Usage Summary:
Prompt chars: 2741
Completion chars: 4739
Total chars: 7480

üß© GENERATED MCQs
{
  "1": {
    "mcq": "Based on the provided text, what primary characteristic is attributed to Python?",
    "options": {
      "a": "It is a highly specialized language for a single domain.",
      "b": "It is a versatile language with diverse applications.",
      "c": "It is an outdated language primarily used for legacy systems.",
      "d": "It is a language exclusively for graphical user interface development."
    },
    "correct": "b"
  },
  "2": {
    "mcq": "Which of the following applications is explicitly mentioned in the text as a use case for Python?",
    "options": {
      "a": "Mobile app development",
      "b": "Video game design",
      "c": "Data analysis",
      "d": "Operating system programming"
    },
    "correct": "c"
  },
  "3": {
    "mcq": "According to the text, Python is utilized for a combination of which three specific 

In [21]:
# ============= Export Quiz to CSV ============= #
from datetime import datetime
import os

print("\n" + "="*60)
print("üíæ EXPORTING QUIZ TO CSV")
print("="*60)

# Create quiz DataFrame from generated_quiz
quiz_export_data = []
for key, value in generated_quiz.items():
    question = value["mcq"]
    options = " | ".join(
        [
            f"{option.upper()}: {option_value}"
            for option, option_value in value['options'].items() 
        ]
    )
    correct_answer = value['correct']
    quiz_export_data.append({
        "Question_Number": key,
        "Question": question,
        "Option_A": value['options'].get('a', ''),
        "Option_B": value['options'].get('b', ''),
        "Option_C": value['options'].get('c', ''),
        "Option_D": value['options'].get('d', ''),
        "Correct_Answer": correct_answer
    })

df_export = pd.DataFrame(quiz_export_data)

# Create export path with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
export_dir = r"C:\Users\ADMIN\Desktop\mcqgen"
csv_filename = f"generated_quiz_{timestamp}.csv"
csv_path = os.path.join(export_dir, csv_filename)

# Export to CSV
df_export.to_csv(csv_path, index=False)

print(f"\n‚úÖ Quiz exported successfully!")
print(f"üìÑ File: {csv_filename}")
print(f"üìÅ Location: {csv_path}")
print(f"\nüìä Quiz Summary:")
print(f"   Total Questions: {len(df_export)}")
print(f"   File Size: {os.path.getsize(csv_path)} bytes")

# Display preview
print(f"\nüìã CSV Preview:")
print(df_export.to_string(index=False))


üíæ EXPORTING QUIZ TO CSV

‚úÖ Quiz exported successfully!
üìÑ File: generated_quiz_20251229_144612.csv
üìÅ Location: C:\Users\ADMIN\Desktop\mcqgen\generated_quiz_20251229_144612.csv

üìä Quiz Summary:
   Total Questions: 3
   File Size: 938 bytes

üìã CSV Preview:
Question_Number                                                                                               Question                                                 Option_A                                              Option_B                                                         Option_C                                                               Option_D Correct_Answer
              1                       Based on the provided text, what primary characteristic is attributed to Python? It is a highly specialized language for a single domain. It is a versatile language with diverse applications.    It is an outdated language primarily used for legacy systems. It is a language exclusively for graphical user inte