In [6]:
import os
import json
import pandas as pd
import traceback

In [7]:
from langchain_openai import ChatOpenAI

In [8]:
from dotenv import load_dotenv
load_dotenv()

True

In [9]:
KEY = os.getenv("OPENAI_API_KEY")

In [11]:
llm = ChatOpenAI(api_key=KEY, model_name = "gpt-4o", temperature = 0.3)

In [12]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [13]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "option a",
            "b": "option b",
            "c": "option c",
            "d": "option d",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "option a",
            "b": "option b",
            "c": "option c",
            "d": "option d",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "option a",
            "b": "option b",
            "c": "option c",
            "d": "option d",
        },
        "correct": "correct answer",
    },
}

In [41]:
TEMPLATE = """
Text: {text}
You are an expert MCQ test maker. Given the above text, it is your job to \n
create a quiz of {number} multiple choice questions for {subject} students in a {tone} tone.
Make sure the questions are not repeated and check to make sure that all the questions can be solved with information \n
from the text. Make sure to format your questions like the RESPONSE_JSON below.
Ensure that you make {number} questions.

RESPONSE_JSON: {response_json}
"""

In [42]:
quiz_generation_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE
)

In [43]:
quiz_chain = LLMChain(llm = llm, prompt = quiz_generation_prompt, output_key = "quiz", verbose = True)

In [44]:
TEMPLATE2 = """
You are an English writer who is very proficient in the English langauge, especically grammar. Given a \n
multiple choice quiz for {subject} students, you need to evaluate the complexity of the questions and provide a \n
complexity analysis of the quiz. Only use at max 50 words for the ocmplexity analysis. If the quiz is not on par with \n
the cognitive and analytical abilities of the students, update the quiz questions which need to be changed so that the tone \n
is appropriate for the students' cogntive level.
Quiz MCQs:
{quiz}

Check from an expert English writer of the above quiz:
"""

In [45]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables = ["subject", "quiz"],
    template = TEMPLATE2
)

In [46]:
review_chain = LLMChain(llm = llm, prompt = quiz_evaluation_prompt, output_key = "review", verbose = True)

In [69]:
generate_evaluate_quiz = SequentialChain(
    chains = [quiz_chain, review_chain],
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    output_variables = ["quiz", "review"],
    verbose = True,
)

In [70]:
file_path = r"/Users/suhaspalawala/Documents/GitHub/mcq-generator/data.txt"

In [71]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [72]:
print(TEXT)

The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[8][9] The synonym self-teaching computers was also used in this time period.[10][11]

Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[12] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[13] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[12] Other researchers who have studied human cognitive systems contributed 

In [73]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "option a", "b": "option b", "c": "option c", "d": "option d"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "option a", "b": "option b", "c": "option c", "d": "option d"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "option a", "b": "option b", "c": "option c", "d": "option d"}, "correct": "correct answer"}}'

In [74]:
NUMBER = 5
SUBJECT = "Machine Learning"
TONE = "simple"

In [75]:
with get_openai_callback() as cb:
    response = generate_evaluate_quiz(
        {
            "text" : TEXT,
            "number" : NUMBER,
            "subject" : SUBJECT,
            "tone" : TONE,
            "response_json" : json.dumps(RESPONSE_JSON),
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[8][9] The synonym self-teaching computers was also used in this time period.[10][11]

Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[12] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[13] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes

In [76]:
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost: {cb.total_cost}")

Total Tokens: 2353
Prompt Tokens: 1453
Completion Tokens: 900
Total Cost: 0.020765


In [77]:
response

{'text': 'The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[8][9] The synonym self-teaching computers was also used in this time period.[10][11]\n\nAlthough the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[12] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[13] Hebb\'s model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[12] Other researchers who have studied human cognitive systems

In [78]:
quiz = response.get("quiz")

In [79]:
quiz = json.loads(quiz)

In [80]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Options": options, "Correct": correct})

In [81]:
quiz_table_data

[{'MCQ': "Who coined the term 'machine learning' in 1959?",
  'Options': 'a: Donald Hebb | b: Arthur Samuel | c: Walter Pitts | d: Tom M. Mitchell',
  'Correct': 'Arthur Samuel'},
 {'MCQ': "What was the name of the experimental 'learning machine' developed by Raytheon Company in the early 1960s?",
  'Options': 'a: Cybertron | b: Deep Blue | c: AlphaGo | d: Perceptron',
  'Correct': 'Cybertron'},
 {'MCQ': 'Which book by Donald Hebb introduced a theoretical neural structure formed by interactions among nerve cells?',
  'Options': 'a: Learning Machines | b: The Organization of Behavior | c: Computing Machinery and Intelligence | d: Pattern Recognition',
  'Correct': 'The Organization of Behavior'},
 {'MCQ': 'What are the two main objectives of modern-day machine learning?',
  'Options': 'a: Data classification and future outcome prediction | b: Data storage and retrieval | c: Algorithm optimization and data mining | d: Pattern recognition and speech synthesis',
  'Correct': 'Data classifi

In [82]:
quiz = pd.DataFrame(quiz_table_data)

In [86]:
quiz.to_csv("machine_learning_quiz.csv", index = False)