In [77]:
from dotenv import load_dotenv
from langchain import PromptTemplate
from langchain.chains import SequentialChain, LLMChain
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
import os
import json
import pandas as pd
from langchain_core.runnables import RunnableLambda
import re

In [39]:
load_dotenv()

True

In [40]:
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY2")

In [41]:
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model="gpt-4o-mini",
    temperature=0.8,
)

In [42]:
RESPONSE_JSON = {
    "1": {
        "mcq": "What is the capital of France?",
        "options": {
            "a": "Berlin",
            "b": "Madrid",
            "c": "Paris",
            "d": "Rome"
        },
        "correct_option": "c"
    }
}


In [43]:
TEMPLATE=""" 
text={text}
subject={subject}
tone={tone}
number={number}

Instructios:
You are to generate a quiz in a {tone} tone based on the subject "{subject}".
the quiz should contain {number} multiple-choice question.
Each question should have:
- a clear and concise question text
- four option lebeled A, B, C, D
- a correct answer key

output must be in the following JSON format:
{response_json}

"""

In [44]:
quiz_prompt = PromptTemplate(
    input_variables=["text", "subject", "number", "tone", "response_json"],
    template=TEMPLATE
)

In [45]:
# quiz_chain = LLMChain(
#     llm=llm,
#     prompt=quiz_prompt,
#     output_key="quiz",
#     verbose=True
# )

quiz_chain= quiz_prompt | llm

In [46]:
TEMPLATE = """
You are an expert English grammarian and writer. Given a multiple-choice quiz for {subject} students,
you need to evaluate the complexity of the questions and provide a complete analysis of the quiz.

- Use **no more than 50 words** for the complexity analysis.
- If any question does not align with the cognitive and analytical abilities of the students,
  revise only those questions and adjust the tone to better suit the appropriate difficulty level.

Quiz MCQs:
{quiz}

Please provide your evaluation and the updated quiz (if needed) from the perspective of an expert English writer:
"""


In [47]:
review_prompt = PromptTemplate(
    input_variables=["subject", "quiz"],
    template=TEMPLATE,
)

In [48]:
# review_chain = LLMChain(
#     llm=llm,
#     prompt=quiz_evalution_prompt,
#     output_key="review",
#     verbose=True
# )

review_chain= review_prompt

In [56]:
# gen_evaluate_chain = SequentialChain(
#     chains=[quiz_chain, review_chain],
#     input_variables=["text", "subject", "number", "tone", "response_json"],
#     output_variables=["quiz", 'review'],
#     verbose=True
# )

quiz_evaluate_chain=(
    RunnableLambda(lambda inputs:{
        **inputs,
        "quiz": quiz_chain.invoke(inputs)
    }) |
    RunnableLambda(lambda inputs:{
        "quiz": inputs["quiz"],
        "review": review_chain.invoke({
            "quiz": inputs["quiz"],
            "subject": inputs["subject"]
        })
    })
)

In [57]:
from pathlib import Path
file_path = r'../data.txt'

In [58]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [59]:
NUMBER=5
SUBJECT="Data science"
TONE="Hard"

In [62]:
# with get_openai_callback() as cb:
#     response = gen_evaluate_chain(
#         {
#             "text":TEXT,
#             "number":NUMBER,
#             "subject":SUBJECT,
#             "tone":TONE,
#             "response_json":json.dumps(RESPONSE_JSON)
#         }
#     )

with get_openai_callback() as cb:
  response = quiz_evaluate_chain.invoke(
       {
        "text":TEXT,
        "subject":SUBJECT,
        "number":NUMBER,
        "tone":TONE,
        "response_json":json.dumps(RESPONSE_JSON)
    }
  )

In [64]:
print(f"Tokens used: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total cose: ${cb.total_cost}")

Tokens used: 988
Prompt Tokens: 603
Completion Tokens: 385
Total cose: $0.00032145


In [29]:
quiz_dict= json.loads(quiz_str)
quiz_dict

{'1': {'mcq': 'What is the primary purpose of data science?',
  'options': {'a': 'To create programming languages',
   'b': 'To extract knowledge and insights from data',
   'c': 'To develop hardware solutions',
   'd': 'To manage databases'},
  'correct_answer': 'b'},
 '2': {'mcq': 'Which of the following fields does NOT directly contribute to data science?',
  'options': {'a': 'Statistics',
   'b': 'Philosophy',
   'c': 'Computer Science',
   'd': 'Information Science'},
  'correct_answer': 'b'},
 '3': {'mcq': 'According to Jim Gray, how is data science characterized in relation to traditional paradigms of science?',
  'options': {'a': 'As a purely theoretical field',
   'b': 'As the first paradigm of science',
   'c': 'As a fourth paradigm that is data-driven',
   'd': 'As a subset of empirical science'},
  'correct_answer': 'c'},
 '4': {'mcq': 'Which of the following skills is essential for a data scientist?',
  'options': {'a': 'Creative writing',
   'b': 'Statistical knowledge',


In [30]:
quiz_table = []
for key, value in quiz_dict.items():
    mcq = value['mcq']
    options = " || ".join(
        [
            f"{option} : {option_value} "
            for option, option_value in value["options"].items()
        ]
    )
    correct_answer = value["correct_answer"]
    quiz_table.append({"MCQ":mcq, "Choices":options, "Correct_Answer":correct_answer})

In [31]:
df = pd.DataFrame(quiz_table)
df

Unnamed: 0,MCQ,Choices,Correct_Answer
0,What is the primary purpose of data science?,a : To create programming languages || b : To...,b
1,Which of the following fields does NOT directl...,a : Statistics || b : Philosophy || c : Comp...,b
2,"According to Jim Gray, how is data science cha...",a : As a purely theoretical field || b : As t...,c
3,Which of the following skills is essential for...,a : Creative writing || b : Statistical knowl...,b
4,What role does human-computer interaction play...,a : It is irrelevant to data analysis || b : ...,b


In [32]:
df.to_csv("data_science.csv", index=False)