In [1]:
import os
from dotenv import load_dotenv
import json
import pandas as pd
import traceback

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [4]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GEMINI_API_KEY)

In [5]:
from langchain_core.prompts import PromptTemplate

In [6]:
TEMPLATE = """
Text : {text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
###RESPONSE JSON
{response_json}"

"""

In [7]:
RESPONSE_JSON = {
    "1":  {
        "MCQ": "Multiple Choice Question",
        "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"},
        "Correct": "Correct Answer"
    },
    "2":  {
        "MCQ": "Multiple Choice Question",
        "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"},
        "Correct": "Correct Answer"
    },
    "3":  {
        "MCQ": "Multiple Choice Question",
        "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"},
        "Correct": "Correct Answer"
    }
}

In [8]:
quiz_prompt = PromptTemplate(template=TEMPLATE, input_variables=[
                             "text", "number", "subject", "tone", "response_json"])

In [9]:
from langchain.chains import LLMChain

quiz_chain = LLMChain(llm=llm, prompt=quiz_prompt,
                      output_key="quiz", verbose=True)

In [10]:
TEMPLATE2 = """
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [11]:
evaluation_prompt = PromptTemplate(
    template=TEMPLATE2, input_variables=["subject", "quiz"])

In [12]:
review_chain = LLMChain(llm=llm, prompt=evaluation_prompt,
                        output_key="review", verbose=True)

In [13]:
from langchain.chains import SequentialChain

generate_evaluate_chain = SequentialChain(chains=[quiz_chain, review_chain], input_variables=[
    "text", "number", "subject", "tone", "response_json"], output_variables=["quiz", "review"], verbose=True)

In [14]:
with open("C:\Development\mcq-generator\data.txt", "r") as f:
    data = f.read()

In [15]:
print(data)

Natural language processing, or NLP, combines computational linguisticsâ€”rule-based modeling of human languageâ€”with statistical and machine learning models to enable computers and digital devices to recognize, understand and generate text and speech. Human language is filled with ambiguities that make it incredibly difficult to write software that accurately determines the intended meaning of text or voice data. Homonyms, homophones, sarcasm, idioms, metaphors, grammar and usage exceptions, variations in sentence structureâ€”these just a few of the irregularities of human language that take humans years to learn, but that programmers must teach natural language-driven applications to recognize and understand accurately from the start, if those applications are going to be useful.




In [16]:
json.dumps(RESPONSE_JSON)

'{"1": {"MCQ": "Multiple Choice Question", "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"}, "Correct": "Correct Answer"}, "2": {"MCQ": "Multiple Choice Question", "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"}, "Correct": "Correct Answer"}, "3": {"MCQ": "Multiple Choice Question", "Options": {"a": "Choice here", "b": "Choice here", "c": "Choice here", "d": "Choice here"}, "Correct": "Correct Answer"}}'

In [17]:
NUMBER = 5
SUBJECT = "Maths"
TONE = "Formal"

In [18]:
response = generate_evaluate_chain(
    {"text": data, "number": NUMBER, "subject": SUBJECT, "tone": TONE, "response_json": RESPONSE_JSON})

  warn_deprecated(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text : Natural language processing, or NLP, combines computational linguisticsâ€”rule-based modeling of human languageâ€”with statistical and machine learning models to enable computers and digital devices to recognize, understand and generate text and speech. Human language is filled with ambiguities that make it incredibly difficult to write software that accurately determines the intended meaning of text or voice data. Homonyms, homophones, sarcasm, idioms, metaphors, grammar and usage exceptions, variations in sentence structureâ€”these just a few of the irregularities of human language that take humans years to learn, but that programmers must teach natural language-driven applications to recognize and understand accurately from the start, if those applications are going to be useful.


You are an expert MCQ maker. Given the above text, it is your job

In [19]:
response

{'text': 'Natural language processing, or NLP, combines computational linguisticsâ€”rule-based modeling of human languageâ€”with statistical and machine learning models to enable computers and digital devices to recognize, understand and generate text and speech. Human language is filled with ambiguities that make it incredibly difficult to write software that accurately determines the intended meaning of text or voice data. Homonyms, homophones, sarcasm, idioms, metaphors, grammar and usage exceptions, variations in sentence structureâ€”these just a few of the irregularities of human language that take humans years to learn, but that programmers must teach natural language-driven applications to recognize and understand accurately from the start, if those applications are going to be useful.\n\n',
 'number': 5,
 'subject': 'Maths',
 'tone': 'Formal',
 'response_json': {'1': {'MCQ': 'Multiple Choice Question',
   'Options': {'a': 'Choice here',
    'b': 'Choice here',
    'c': 'Choice 

In [20]:
quiz = response.get("quiz")
quiz = quiz.split('\n', 1)[1]
quiz = quiz.replace("'", "\"")
quiz = json.loads(quiz)

In [21]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["MCQ"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["Options"].items()
        ]
    )
    correct = value["Correct"]
    quiz_table_data.append(
        {"MCQ": mcq, "Choices": options, "Correct": correct})

In [22]:
quiz_table_data

[{'MCQ': 'What is the main component of NLP that enables computers to recognize and understand text and speech?',
  'Choices': 'a: Computational linguistics | b: Statistical models | c: Machine learning models | d: Combinations of a, b, and c',
  'Correct': 'd'},
 {'MCQ': 'Which of the following challenges makes it difficult to develop accurate NLP applications?',
  'Choices': 'a: Ambiguities in human language | b: Lack of training data | c: Computational limitations | d: All of the above',
  'Correct': 'a'},
 {'MCQ': 'Which of the following is NOT an example of an irregularity found in human language?',
  'Choices': 'a: Homonyms | b: Sarcasm | c: Boolean algebra | d: Grammar exceptions',
  'Correct': 'c'},
 {'MCQ': 'What is the significance of teaching NLP applications to recognize human language irregularities?',
  'Choices': 'a: To enhance their accuracy and usefulness | b: To reduce the need for human supervision | c: To make them more efficient | d: None of the above',
  'Correct'

In [23]:
mcq_data = pd.DataFrame(quiz_table_data)

In [25]:
mcq_data.to_csv("mcq_sample_data.csv", index=False)