In [59]:
from dotenv import load_dotenv
from langchain import PromptTemplate
from langchain.chains import SequentialChain, LLMChain
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
import os
import json
import pandas as pd

In [60]:
load_dotenv()

True

In [61]:
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY2")

In [62]:
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model="gpt-4o-mini",
    temperature=0.8,
)

In [63]:
RESPONSE_JSON={
    "1":{
        "mcq":"multiple question",
        "options":{
            "a":"choice answer",
            "b":"choice answer",
            "c":"choice answer",
            "d":"choice answer",
        },
        "correct_answer":"correct answer"
    }
}

In [64]:
TEMPLATE=""" 
subject={subject}
tone={tone}
number={number}

Instructios:
You are to generate a quiz in a {tone} tone based on the subject "{subject}".
the quiz should contain {number} multiple-choice question.
Each question should have:
- a clear and concise question text
- four option lebeled A, B, C, D
- a correct answer key

output must be in the following JSON format:
{response_json}

"""

In [65]:
quiz_prompt = PromptTemplate(
    input_variables=["text", "subject", "number", "tone", "response_json"],
    template=TEMPLATE
)

In [66]:
quiz_chain = LLMChain(
    llm=llm,
    prompt=quiz_prompt,
    output_key="quiz",
    verbose=True
)

In [67]:
TEMPLATE = """
You are an expert English grammarian and writer. Given a multiple-choice quiz for {subject} students,
you need to evaluate the complexity of the questions and provide a complete analysis of the quiz.

- Use **no more than 50 words** for the complexity analysis.
- If any question does not align with the cognitive and analytical abilities of the students,
  revise only those questions and adjust the tone to better suit the appropriate difficulty level.

Quiz MCQs:
{quiz}

Please provide your evaluation and the updated quiz (if needed) from the perspective of an expert English writer:
"""


In [68]:
quiz_evalution_prompt = PromptTemplate(
    input_variables=["subject", "quiz"],
    template=TEMPLATE,
)

In [69]:
review_chain = LLMChain(
    llm=llm,
    prompt=quiz_evalution_prompt,
    output_key="review",
    verbose=True
)

In [70]:
gen_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "subject", "number", "tone", "response_json"],
    output_variables=["quiz", 'review'],
    verbose=True
)

In [71]:
from pathlib import Path
file_path = r'E:\learning\Gen-AI\mcqgen\data.txt'

In [72]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [73]:
NUMBER=5
SUBJECT="Data science"
TONE="Hard"

In [74]:
with get_openai_callback() as cb:
    response = gen_evaluate_chain(
        {
            "text":TEXT,
            "number":NUMBER,
            "subject":SUBJECT,
            "tone":TONE,
            "response_json":json.dumps(RESPONSE_JSON)
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m 
subject=Data science
tone=Hard
number=5

Instructios:
You are to generate a quiz in a Hard tone based on the subject "Data science".
the quiz should contain 5 multiple-choice question.
Each question should have:
- a clear and concise question text
- four option lebeled A, B, C, D
- a correct answer key

output must be in the following JSON format:
{"1": {"mcq": "multiple question", "options": {"a": "choice answer", "b": "choice answer", "c": "choice answer", "d": "choice answer"}, "correct_answer": "correct answer"}}

[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert English grammarian and writer. Given a multiple-choice quiz for Data science students,
you need to evaluate the complexity of the questions and provide a complete analysis of the quiz.

- Use **no more than 50 w

In [75]:
print(f"Tokens used: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total cose: ${cb.total_cost}")

Tokens used: 1755
Prompt Tokens: 730
Completion Tokens: 1025
Total cose: $0.0007245


In [78]:
quiz_str = response["quiz"].strip("`").replace("json", "").strip()

In [85]:
quiz_dict= json.loads(quiz_str)
quiz_dict

{'1': {'mcq': 'What is the primary purpose of feature scaling in data preprocessing?',
  'options': {'a': 'To increase the number of features',
   'b': 'To ensure all features contribute equally to model training',
   'c': 'To reduce the dimensionality of the dataset',
   'd': 'To eliminate outliers from the dataset'},
  'correct_answer': 'b'},
 '2': {'mcq': 'Which of the following techniques is NOT typically used for regression analysis?',
  'options': {'a': 'Linear Regression',
   'b': 'Logistic Regression',
   'c': 'Ridge Regression',
   'd': 'K-Means Clustering'},
  'correct_answer': 'd'},
 '3': {'mcq': 'In a decision tree, what metric is commonly used to determine the best split at each node?',
  'options': {'a': 'Mean Squared Error',
   'b': 'Entropy',
   'c': 'Cosine Similarity',
   'd': 'F1 Score'},
  'correct_answer': 'b'},
 '4': {'mcq': 'Which of the following is a key characteristic of overfitting in machine learning models?',
  'options': {'a': 'High accuracy on training da

In [91]:
quiz_table = []
for key, value in quiz_dict.items():
    mcq = value['mcq']
    options = " || ".join(
        [
            f"{option} : {option_value} "
            for option, option_value in value["options"].items()
        ]
    )
    correct_answer = value["correct_answer"]
    quiz_table.append({"MCQ":mcq, "Choices":options, "Correct_Answer":correct_answer})

In [94]:
df = pd.DataFrame(quiz_table)
df

Unnamed: 0,MCQ,Choices,Correct_Answer
0,What is the primary purpose of feature scaling...,a : To increase the number of features || b :...,b
1,Which of the following techniques is NOT typic...,a : Linear Regression || b : Logistic Regress...,d
2,"In a decision tree, what metric is commonly us...",a : Mean Squared Error || b : Entropy || c :...,b
3,Which of the following is a key characteristic...,a : High accuracy on training data and low acc...,a
4,What does the term 'curse of dimensionality' r...,a : The difficulty of visualizing high-dimensi...,b


In [95]:
df.to_csv("data_science.csv", index=False)