In [1]:
import os
import json
import pandas as pd
import traceback

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain_groq import ChatGroq

In [4]:
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

In [6]:
llm = ChatGroq(
    temperature=0,
    groq_api_key=GROQ_API_KEY,
    model_name="llama-3.3-70b-versatile",
)

In [7]:
llm.invoke("Hi")

AIMessage(content="It's nice to meet you. Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 36, 'total_tokens': 59, 'completion_time': 0.083636364, 'prompt_time': 0.007315562, 'queue_time': 0.018162004, 'total_time': 0.090951926}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_4e32347616', 'finish_reason': 'stop', 'logprobs': None}, id='run-34d35480-40e0-474b-8cdd-d4f65d1bda5e-0', usage_metadata={'input_tokens': 36, 'output_tokens': 23, 'total_tokens': 59})

In [8]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [9]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [10]:
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below that can be convertable into python dictionary and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [11]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE,
)

In [12]:
quiz_chain = LLMChain(
    llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True
)

  quiz_chain = LLMChain(


In [13]:
TEMPLATE2 = """
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [14]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["subject", "quiz"], template=TEMPLATE
)

In [15]:
review_chain = LLMChain(
    llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True
)

In [16]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True,
)

In [17]:
file_path = r"D:\AppliedGenAI\Python\mcqgen\data.txt"

In [20]:
file_path

'D:\\AppliedGenAI\\Python\\mcqgen\\data.txt'

In [18]:
with open(file_path, "r") as file:
    TEXT = file.read()

In [18]:
print(TEXT
      )

Early usage
In 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]

The term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, the definition was still in flux. After the 1985 lecture at the Chinese Academy of Sciences in Beijing, in 1997 C. F. Jeff Wu again sugge

In [19]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [20]:
NUMBER = 10
SUBJECT = "Data Science"
TONE = "medium"

In [21]:
# https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

# How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON),
        }
    )

  response = generate_evaluate_chain(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Early usage
In 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]

The term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, 

In [22]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:4065
Prompt Tokens:2028
Completion Tokens:2037
Total Cost:0.0


In [22]:
response

{'text': 'Early usage\nIn 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]\n\nThe term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, the definition was still in flux. After the 1985 lecture at the Chinese Academy of Sciences in Beijing, in 1997 C. F. Jeff W

In [23]:
quiz = response.get("quiz")

In [47]:
result =  llm.invoke(f"""1. remove backticks 
                     2. rewrite it in a pure json format {quiz}""")

In [45]:
result.content

'```json\n{\n  "1": {\n    "mcq": "Who first used the term \\"data science\\" as an alternative name for statistics?",\n    "options": {\n      "a": "John Tukey",\n      "b": "Peter Naur",\n      "c": "C. F. Jeff Wu",\n      "d": "William S. Cleveland"\n    },\n    "correct": "c"\n  },\n  "2": {\n    "mcq": "In what year did C. F. Jeff Wu first suggest that statistics be renamed data science?",\n    "options": {\n      "a": "1974",\n      "b": "1985",\n      "c": "1997",\n      "d": "2001"\n    },\n    "correct": "c"\n  },\n  "3": {\n    "mcq": "Which popular terms were used in the 1990s to describe the process of finding patterns in large datasets?",\n    "options": {\n      "a": "Data analysis and data mining",\n      "b": "Knowledge discovery and data mining",\n      "c": "Big data and data science",\n      "d": "Statistical learning and data science"\n    },\n    "correct": "b"\n  },\n  "4": {\n    "mcq": "Who declared \\"Data Scientist: The Sexiest Job of the 21st Century\\" in 20

In [24]:
quiz = json.loads(quiz)

In [25]:
quiz

{'1': {'mcq': "Who is credited with describing a field called 'data analysis' in 1962, which resembles modern data science?",
  'options': {'a': 'C. F. Jeff Wu',
   'b': 'John Tukey',
   'c': 'Peter Naur',
   'd': 'William S. Cleveland'},
  'correct': 'b'},
 '2': {'mcq': "In what year did C. F. Jeff Wu use the term 'data science' for the first time as an alternative name for statistics?",
  'options': {'a': '1974', 'b': '1985', 'c': '1992', 'd': '1997'},
  'correct': 'b'},
 '3': {'mcq': 'What was the name of the journal launched by the Committee on Data for Science and Technology in 2002?',
  'options': {'a': 'The Journal of Data Science',
   'b': 'Data Science Journal',
   'c': 'Statistics Journal',
   'd': 'Data Analysis Journal'},
  'correct': 'b'},
 '4': {'mcq': 'Who is sometimes attributed with the modern conception of data science as an independent discipline?',
  'options': {'a': 'DJ Patil',
   'b': 'William S. Cleveland',
   'c': 'John Tukey',
   'd': 'C. F. Jeff Wu'},
  'corre

In [26]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [27]:
quiz_table_data

[{'MCQ': "Who is credited with describing a field called 'data analysis' in 1962, which resembles modern data science?",
  'Choices': 'a: C. F. Jeff Wu | b: John Tukey | c: Peter Naur | d: William S. Cleveland',
  'Correct': 'b'},
 {'MCQ': "In what year did C. F. Jeff Wu use the term 'data science' for the first time as an alternative name for statistics?",
  'Choices': 'a: 1974 | b: 1985 | c: 1992 | d: 1997',
  'Correct': 'b'},
 {'MCQ': 'What was the name of the journal launched by the Committee on Data for Science and Technology in 2002?',
  'Choices': 'a: The Journal of Data Science | b: Data Science Journal | c: Statistics Journal | d: Data Analysis Journal',
  'Correct': 'b'},
 {'MCQ': 'Who is sometimes attributed with the modern conception of data science as an independent discipline?',
  'Choices': 'a: DJ Patil | b: William S. Cleveland | c: John Tukey | d: C. F. Jeff Wu',
  'Correct': 'b'},
 {'MCQ': 'What was the title of the paper written by Thomas H. Davenport and DJ Patil in

In [28]:
quiz = pd.DataFrame(quiz_table_data)

In [29]:
quiz

Unnamed: 0,MCQ,Choices,Correct
0,Who is credited with describing a field called...,a: C. F. Jeff Wu | b: John Tukey | c: Peter Na...,b
1,In what year did C. F. Jeff Wu use the term 'd...,a: 1974 | b: 1985 | c: 1992 | d: 1997,b
2,What was the name of the journal launched by t...,a: The Journal of Data Science | b: Data Scien...,b
3,Who is sometimes attributed with the modern co...,a: DJ Patil | b: William S. Cleveland | c: Joh...,b
4,What was the title of the paper written by Tho...,a: Data Science: The Future of Statistics | b:...,b
5,In what year did the American Statistical Asso...,a: 2005 | b: 2008 | c: 2010 | d: 2014,d
6,Who is credited with proposing the term 'data ...,a: Peter Naur | b: John Tukey | c: C. F. Jeff ...,a
7,What are the three aspects of data science acc...,"a: Data design, collection, and analysis | b: ...",a
8,Who are credited with the professional title o...,a: DJ Patil and Jeff Hammerbacher | b: John Tu...,a
9,What is the role of data scientists according ...,a: To manage digital data collections | b: To ...,b


In [30]:
quiz.to_csv("Data Science.csv", index=False)