In [1]:
import os
import json
import pandas as pd
import traceback

In [2]:
pip install PyPDF2

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")


In [5]:
pip install langchain

Defaulting to user installation because normal site-packages is not writeable
Collecting langchain
  Using cached langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-core<1.0.0,>=0.3.72 (from langchain)
  Using cached langchain_core-0.3.72-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.9 (from langchain)
  Using cached langchain_text_splitters-0.3.9-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith>=0.1.17 (from langchain)
  Using cached langsmith-0.4.8-py3-none-any.whl.metadata (15 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith>=0.1.17->langchain)
  Using cached orjson-3.11.1-cp312-cp312-win_amd64.whl.metadata (43 kB)
Using cached langchain-0.3.27-py3-none-any.whl (1.0 MB)
Using cached langchain_core-0.3.72-py3-none-any.whl (442 kB)
Using cached langchain_text_splitters-0.3.9-py3-none-any.whl (33 kB)
Using cached langsmith-0.4.8-py3-none-any.whl (367 kB)
Using cached orjson-3.11.1-cp312-cp312-win_amd64.whl (131 kB)
I

In [6]:
from langchain.callbacks.base import BaseCallbackHandler
import re

class GeminiTokenUsageSimulator(BaseCallbackHandler):
    def __init__(self):
        self.prompt_tokens = 0
        self.completion_tokens = 0
        self.total_tokens = 0

    def on_llm_start(self, serialized, prompts, **kwargs):
        self.prompt_tokens = sum(self.estimate_tokens(prompt) for prompt in prompts)

    def on_llm_end(self, response, **kwargs):
        output = response.generations[0][0].text
        self.completion_tokens = self.estimate_tokens(output)
        self.total_tokens = self.prompt_tokens + self.completion_tokens
        print(f"\n🔹 Prompt tokens: {self.prompt_tokens}")
        print(f"🔹 Completion tokens: {self.completion_tokens}")
        print(f"🔹 Total tokens: {self.total_tokens}")

    def estimate_tokens(self, text):
        return len(re.findall(r'\w+', text))  # Simple estimation: count words


In [8]:
pip install langchain-google-genai


Defaulting to user installation because normal site-packages is not writeable
Collecting langchain-google-genai
  Using cached langchain_google_genai-2.1.8-py3-none-any.whl.metadata (7.0 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Using cached filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Using cached google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0,>=1.34.1 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0,>=1.34.1->google-ai-generativelanguage<0.7.0,>=0.6.18->langchain-google-genai)
  Using cached google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-auth!=2.24.0,!=2.25.0,<3.0.0,>=2.14.1 (from google-ai-generativelanguage<0.7.0,>=

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.37.1 requires protobuf<6,>=3.20, but you have protobuf 6.31.1 which is incompatible.


In [36]:
from langchain_google_genai import ChatGoogleGenerativeAI

token_logger = GeminiTokenUsageSimulator()
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.7
)


In [37]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
import PyPDF2

In [38]:
TEMPLATE = """
text : {text}
You are an expert in making quizzes.Given the text above, generate a quiz with {number} questions on the topic of {subject}.
The quiz should be in the {tone} tone.Make Sure questions should not be repeated.Make sure to format your response your format like RESPONSE_JSON below and use
it as a guide.
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [39]:
response_json = {
    "1":{
        "mcq" : "Multiple Choice Question ",
        "options" : [
            "Option A",
            "Option B",
            "Option C",
            "Option D"
        ],
        "answer" : "correct answer"
    },
    "2":{
        "mcq" : "Multiple Choice Question 2",
        "options" : [
            "Option A",
            "Option B",
            "Option C",
            "Option D"
        ],
        "answer" : "correct answer"
    },
    "3":{
        "mcq" : "Multiple Choice Question 3",
        "options" : [
            "Option A",
            "Option B",
            "Option C",
            "Option D"
        ],
        "answer" : "correct answer"
    },
    "4":{
        "mcq" : "Multiple Choice Question 4",
        "options" : [
            "Option A",
            "Option B",
            "Option C",
            "Option D"
        ],
        "answer" : "correct answer"
    }
}

In [40]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE
    )

In [41]:
quiz_chain = LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [42]:
TEMPLATE2 = """
You are an expert in english grammarian and writer. Given a Multiple Choice Quiz (MCQ) for {subject} students.You need to
evaluare the quiz  question complexeity and give a complete analysis of the quiz.Only use atmost 30 words to describe the complexeity of quiz
update the quiz question which needs to be change the tone such that it perfectly fits the statement abilities
Quiz MCQs {quiz}. 

Check from an expert of the above quiz
"""

In [43]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["quiz", "subject"],
    template=TEMPLATE2)

In [44]:
quiz_eval = LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [45]:
quiz_chain.prompt.input_variables

['number', 'response_json', 'subject', 'text', 'tone']

In [46]:
print(quiz_chain)

verbose=True prompt=PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\ntext : {text}\nYou are an expert in making quizzes.Given the text above, generate a quiz with {number} questions on the topic of {subject}.\nThe quiz should be in the {tone} tone.Make Sure questions should not be repeated.Make sure to format your response your format like RESPONSE_JSON below and use\nit as a guide.\nEnsure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n') llm=ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000173CE3B0560>, default_metadata=(), model_kwargs={}) output_key='quiz' output_parser=StrOutputParser() llm_kwargs={}


In [47]:
generate_eval_chain = SequentialChain(chains=[quiz_chain, quiz_eval], input_variables=["text", "number", "subject", "tone", "response_json"],
                                      output_variables=["quiz", "review"],
                                      verbose=True)
                                      

In [48]:
file_path = "E:\GenAI\MCQ_GENERATOR\data.txt"

  file_path = "E:\GenAI\MCQ_GENERATOR\data.txt"


In [49]:
file_path

'E:\\GenAI\\MCQ_GENERATOR\\data.txt'

In [50]:
with open(file_path, 'r') as file:
    text = file.read()


In [51]:
print(text)

In machine learning, deep learning focuses on utilizing multilayered neural networks to perform tasks such as classification, regression, and representation learning. The field takes inspiration from biological neuroscience and is centered around stacking artificial neurons into layers and "training" them to process data. The adjective "deep" refers to the use of multiple layers (ranging from three to several hundred or thousands) in the network. Methods used can be supervised, semi-supervised or unsupervised.[2]

Some common deep learning network architectures include fully connected networks, deep belief networks, recurrent neural networks, convolutional neural networks, generative adversarial networks, transformers, and neural radiance fields. These architectures have been applied to fields including computer vision, speech recognition, natural language processing, machine translation, bioinformatics, drug design, medical image analysis, climate science, material inspection and boar

In [52]:
json.dumps(response_json)


'{"1": {"mcq": "Multiple Choice Question ", "options": ["Option A", "Option B", "Option C", "Option D"], "answer": "correct answer"}, "2": {"mcq": "Multiple Choice Question 2", "options": ["Option A", "Option B", "Option C", "Option D"], "answer": "correct answer"}, "3": {"mcq": "Multiple Choice Question 3", "options": ["Option A", "Option B", "Option C", "Option D"], "answer": "correct answer"}, "4": {"mcq": "Multiple Choice Question 4", "options": ["Option A", "Option B", "Option C", "Option D"], "answer": "correct answer"}}'

In [53]:
response = generate_eval_chain.invoke({
    "text": text,
    "number":5,
    "subject": "Deep learning",
    "tone":"simple",
    "response_json":json.dumps(response_json, indent=4)
})



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
text : In machine learning, deep learning focuses on utilizing multilayered neural networks to perform tasks such as classification, regression, and representation learning. The field takes inspiration from biological neuroscience and is centered around stacking artificial neurons into layers and "training" them to process data. The adjective "deep" refers to the use of multiple layers (ranging from three to several hundred or thousands) in the network. Methods used can be supervised, semi-supervised or unsupervised.[2]

Some common deep learning network architectures include fully connected networks, deep belief networks, recurrent neural networks, convolutional neural networks, generative adversarial networks, transformers, and neural radiance fields. These architectures have been applied to fields including computer vision, speech recognition, natural l

In [54]:
quiz = response.get("quiz")

In [55]:
print(quiz)

```json
{
    "1": {
        "mcq": "What does the word 'deep' refer to in deep learning?",
        "options": [
            "The deep understanding required by users",
            "The use of multiple layers in the network",
            "The complex problems it solves",
            "The deep research involved in its creation"
        ],
        "answer": "The use of multiple layers in the network"
    },
    "2": {
        "mcq": "Which of these is a common task that deep learning can perform?",
        "options": [
            "Building a physical robot",
            "Cooking a gourmet meal",
            "Classification",
            "Writing a novel from scratch"
        ],
        "answer": "Classification"
    },
    "3": {
        "mcq": "Deep learning methods can be categorized as which of the following?",
        "options": [
            "Only supervised",
            "Only unsupervised",
            "Supervised, semi-supervised, or unsupervised",
            "Only semi-supervi

In [62]:
print("Type:", type(quiz))
print("Length:", len(quiz))
print("Content Preview:", repr(quiz[:200]))


Type: <class 'str'>
Length: 1879
Content Preview: '```json\n{\n    "1": {\n        "mcq": "What does the word \'deep\' refer to in deep learning?",\n        "options": [\n            "The deep understanding required by users",\n            "The use of multipl'


In [64]:
import json

cleaned_quiz = quiz.strip().removeprefix("```json").removesuffix("```").strip()

try:
    quiz_dict = json.loads(cleaned_quiz)
    print(" JSON parsed successfully!")
except json.JSONDecodeError as e:
    print(" JSON parsing failed:", e)


 JSON parsed successfully!


In [67]:
quiz_table_data = []

for key, value in quiz_dict.items():
    mcq = value["mcq"]
    options = " | ".join([f"{i+1}: {opt}" for i, opt in enumerate(value["options"])])
    correct = value.get("answer", "N/A")

    quiz_table_data.append({
        "MCQ": mcq,
        "Choices": options,
        "Correct": correct
    })


In [68]:
quiz_table_data

[{'MCQ': "What does the word 'deep' refer to in deep learning?",
  'Choices': '1: The deep understanding required by users | 2: The use of multiple layers in the network | 3: The complex problems it solves | 4: The deep research involved in its creation',
  'Correct': 'The use of multiple layers in the network'},
 {'MCQ': 'Which of these is a common task that deep learning can perform?',
  'Choices': '1: Building a physical robot | 2: Cooking a gourmet meal | 3: Classification | 4: Writing a novel from scratch',
  'Correct': 'Classification'},
 {'MCQ': 'Deep learning methods can be categorized as which of the following?',
  'Choices': '1: Only supervised | 2: Only unsupervised | 3: Supervised, semi-supervised, or unsupervised | 4: Only semi-supervised',
  'Correct': 'Supervised, semi-supervised, or unsupervised'},
 {'MCQ': 'In which field has deep learning been applied, as mentioned in the text?',
  'Choices': '1: Archaeology | 2: Computer vision | 3: Astronomy | 4: Geology',
  'Correc

In [80]:
from datetime import datetime

datetime.now().strftime("%d-%m-%Y__%H:%M:%S")

'30-07-2025__16:53:49'