#Introduction

Title: Exam MCQ Generator

#Installs

In [None]:
!pip install -q python-dotenv

!pip install -q langchain_experimental
!pip install -q langchain

!pip install -q openai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.0/163.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.3/803.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.5/215.5 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.4/225.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━

In [None]:
!pip install -q pydantic

#Imports

In [None]:
import os

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

#Data Source

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def read_text_file(path):
  with open(path, 'r', encoding='utf-8') as f:
    text = f.read()

  return text

In [None]:
texts_path = '/content/drive/MyDrive/datasets/exam_mcq_generator/texts'

text_path_01 = os.path.join(texts_path, '01.txt')
text_01 = read_text_file(text_path_01)

text_path_02 = os.path.join(texts_path, '02.txt')
text_02 = read_text_file(text_path_02)

text_path_03 = os.path.join(texts_path, '03.txt')
text_03 = read_text_file(text_path_03)

text_path_04 = os.path.join(texts_path, '04.txt')
text_04 = read_text_file(text_path_04)

text_path_05 = os.path.join(texts_path, '05.txt')
text_05 = read_text_file(text_path_05)

#Env

In [None]:
env_path = '/content/drive/MyDrive/credentials/data-analytics-demo/.env'

In [None]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(env_path)

True

In [None]:
GOOGLE_PALM_API_KEY = os.environ['GOOGLE_PALM_API_KEY']
HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']
OPEN_AI_API_KEY = os.environ['OPEN_AI_API_KEY']

# GOOGLE_PALM_API_KEY, HUGGINGFACE_API_KEY, OPEN_AI_API_KEY

#Model

In [None]:
from pydantic import BaseModel, Field

class MCQModel(BaseModel):
  question: str = Field(description="This is the question text")
  options: list[str] = Field(description="This is a list of multiple choices or options avalible")
  correct_option: str = Field(description="This is the correct choice or option")
  difficulty_level: str = Field(description="This is the difficulty level of the question from one of the three modes: easy, medium, and hard")

In [None]:
class MCQListModel(BaseModel):
  mcq_list: list[MCQModel] = Field(description="A list of multiple choice questions")

In [None]:
from langchain.output_parsers import PydanticOutputParser

pydantic_parser = PydanticOutputParser(pydantic_object=MCQListModel)

format_instructions = pydantic_parser.get_format_instructions()

print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"mcq_list": {"title": "Mcq List", "description": "A list of multiple choice questions", "type": "array", "items": {"$ref": "#/definitions/MCQModel"}}}, "required": ["mcq_list"], "definitions": {"MCQModel": {"title": "MCQModel", "type": "object", "properties": {"question": {"title": "Question", "description": "This is the question text", "type": "string"}, "options": {"title": "Options", "description": "This is a list of multiple choices or options avalible", "type": "array", "items": {"type": "string"}}, "correct_option": {"tit

In [None]:
langchain_llm = ChatOpenAI(openai_api_key=OPEN_AI_API_KEY, model_name="gpt-3.5-turbo-0613", verbose=False,)

In [None]:
prompt = (
    "Following are blocks of text picked at random from a document:"
    "\n{texts}"
    "\nGenerate {n} non-repeating multiple choice questions along with their respective correct options from this text."
    "\nBy default the difficulty level is set to {difficulty_level}, if it is mix then randomly select difficulty"
    "\nAlso, return the difficulty level of the question from one of the three modes: easy, medium, and hard."
    "\nDo not use any outside information."
    "\n{format_instructions}"
)

In [None]:
prompt_template = ChatPromptTemplate.from_template(prompt)

prompt_template

ChatPromptTemplate(input_variables=['difficulty_level', 'format_instructions', 'n', 'texts'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['difficulty_level', 'format_instructions', 'n', 'texts'], template='Following are blocks of text picked at random from a document:\n{texts}\nGenerate {n} non-repeating multiple choice questions along with their respective correct options from this text.\nBy default the difficulty level is set to {difficulty_level}, if it is mix then randomly select difficulty\nAlso, return the difficulty level of the question from one of the three modes: easy, medium, and hard.\nDo not use any outside information.\n{format_instructions}'))])

In [None]:
llm_chain = LLMChain(llm=langchain_llm, prompt=prompt_template, output_key="result")

In [None]:
def get_docs(text, chunk_size=500, chunk_overlap=0):
  """
  note:
    * When chunk_overlap=0, it will try not cut sentences, which is good for out case
  """
  text_splitter = CharacterTextSplitter(
      separator="\n",
      chunk_size=chunk_size,
      chunk_overlap=chunk_overlap,
      length_function=len,
      is_separator_regex=False,
  )

  docs = text_splitter.create_documents([text])

  return docs

In [None]:
from langchain.callbacks import get_openai_callback as get_langchain_openai_callback

import random

def get_exam(text, n=3, difficulty_level="mix"):
  """
  validation: total_docs <= n
  """

  docs = get_docs(text)

  if n > len(docs):
    raise ValueError(f"Too many questions specified, must be less then {len(docs)+1}")

  rand_docs = random.sample(docs, n)

  texts = "".join([f"Text Block {i+1}:\n{doc.page_content}\n" for i, doc in enumerate(rand_docs)])

  with get_langchain_openai_callback() as cb_langchain:
    response = llm_chain.invoke({"texts": texts, "n": n, "difficulty_level": difficulty_level, "format_instructions": format_instructions})

  return {"mcqs_parsed": MCQListModel.parse_raw(response['result']).mcq_list, 'cb_langchain': cb_langchain}

# get_exam(text_01)

#Testing

In [None]:
def display_exam(exam):

  for i in exam['mcqs_parsed']:

    print(f"question: {i.question}\n")

    for index, j in enumerate(i.options):
      print(f"{index+1}. {j}")

    print(f"\ncorrect: {i.correct_option}\n")

    print(f"difficulty: {i.difficulty_level}\n\n")

  print(exam['cb_langchain'])

In [None]:
# exam_01 = get_exam(text_01, n=10)

In [None]:
exam_01 = get_exam(text_01, n=5, difficulty_level='hard')



In [None]:
display_exam(exam_01)

question: What did Seraphina take with her on her journey?

1. A weathered satchel
2. A compass
3. The shimmering artifact
4. All of the above

correct: All of the above

difficulty: hard


question: Where did Seraphina live?

1. Eldridge Haven
2. A small coastal village
3. A close-knit community
4. All of the above

correct: All of the above

difficulty: hard


question: What did Seraphina become after her journey?

1. A curious villager
2. A guardian of realms
3. A young woman
4. None of the above

correct: A guardian of realms

difficulty: hard


question: What did Seraphina discover in the archipelago?

1. Lost civilizations
2. Forgotten magic
3. A realm suspended between reality and dreams
4. All of the above

correct: All of the above

difficulty: hard


question: What triggered Seraphina's journey?

1. A stroll along the shore
2. The beating of her heart
3. The desire for adventure
4. All of the above

correct: All of the above

difficulty: hard


Tokens Used: 1313
	Prompt Token

In [None]:
exam_02 = get_exam(text_02, n=5)



In [None]:
display_exam(exam_02)

question: What fields were revolutionized by SynthEra?

1. a) Engineering, finance, and technology
2. b) Medicine, education, and entertainment
3. c) Science, art, and politics
4. d) Communication, transportation, and agriculture

correct: b) Medicine, education, and entertainment

difficulty: medium


question: What could users do with SynthEra?

1. a) Teleport to different locations
2. b) Control their dreams
3. c) Manipulate the virtual environment around them
4. d) Predict the future

correct: c) Manipulate the virtual environment around them

difficulty: easy


question: What were the ethical considerations associated with SynthEra?

1. a) Environmental impact, economic inequality, and resource depletion
2. b) Privacy, security, and potential misuse of access to the human mind
3. c) Education reform, cultural preservation, and social justice
4. d) Political instability, technological unemployment, and global governance

correct: b) Privacy, security, and potential misuse of access

In [None]:
exam_03 = get_exam(text_03, n=5)

In [None]:
display_exam(exam_03)

question: What is the Collatz conjecture?

1. A problem in abstract mathematics
2. A puzzle in number theory
3. A theorem proven by Lothar Collatz
4. A cycle of numbers: 4, 2, 1

correct: A problem in abstract mathematics

difficulty: medium


question: What makes the Collatz conjecture challenging?

1. Its simplicity
2. Its complexity
3. The involvement of German mathematicians
4. The predictable patterns in the sequence

correct: Its complexity

difficulty: easy


question: What behavior does the Collatz sequence exhibit?

1. Erratic and unpredictable
2. Convergent and predictable
3. Cyclic and repetitive
4. Chaotic and random

correct: Erratic and unpredictable

difficulty: medium


question: Why do mathematicians find the Collatz conjecture intriguing?

1. Its inaccessibility to amateurs
2. Its predictable patterns
3. Its simplicity and depth
4. Its reliance on prime numbers

correct: Its simplicity and depth

difficulty: easy


question: What are the rules of the Collatz conjectur

In [None]:
exam_04 = get_exam(text_04, n=5)



In [None]:
display_exam(exam_04)

question: What is the potency of poetry?

1. Its articulated lexicon
2. Verbal modulations
3. The intervals of pregnant reticence
4. All of the above

correct: All of the above

difficulty: medium


question: What does poetry transcend?

1. Linguistic demarcations
2. Temporal epochs
3. Both A and B
4. None of the above

correct: Both A and B

difficulty: medium


question: What does the art of compaction in poetry entail?

1. Distilling intricate emotions
2. Endowing the abstract with palpability
3. Unveiling profound verities
4. All of the above

correct: All of the above

difficulty: medium


question: What does each poetic modality offer?

1. A distinctive perspective
2. A unique rhythm
3. A specific theme
4. None of the above

correct: A distinctive perspective

difficulty: medium


question: What does the choreography of syllabic configurations and rhythmic resonances do?

1. Imbues poets with expressive profundity
2. Metamorphoses prosaic verbiage
3. Creates an intricate mosaic
4

In [None]:
exam_05 = get_exam(text_05, n=5)

In [None]:
display_exam(exam_05)

question: What is the name of the large-scale language model developed by OpenAI?

1. GPT-2
2. GPT-3
3. BERT
4. ELMO

correct: GPT-3

difficulty: easy


question: Which architecture revolutionized natural language processing by introducing the self-attention mechanism?

1. RNN
2. LSTM
3. GRU
4. Transformer

correct: Transformer

difficulty: medium


question: What is the main concern associated with the evolution of Large Language Models?

1. Bias
2. Performance
3. Accuracy
4. Interpretability

correct: Bias

difficulty: easy


question: In which year was the transformer model introduced?

1. 2015
2. 2016
3. 2017
4. 2018

correct: 2017

difficulty: medium


question: Which approach in natural language processing outperformed rule-based systems in the late 20th century?

1. Statistical
2. Rule-based
3. Symbolic
4. Semantic

correct: Statistical

difficulty: easy


Tokens Used: 1117
	Prompt Tokens: 808
	Completion Tokens: 309
Successful Requests: 1
Total Cost (USD): $0.001830000000000000