In [2]:
import os
import json
import pandas as pd
import traceback

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
google_api_key = os.getenv('GOOGLE_API_KEY')

In [7]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",  # Specify the model to use
    api_key=google_api_key,  # Provide the Google API key for authentication
    temperature=0.2,  # Set the randomness of the model's responses (0 = deterministic, 1 = very random)
)

In [8]:
llm.invoke("Hi")

AIMessage(content='Hi there! How can I help you today?\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-2fb85ca9-2533-4dc8-8135-1c1dfde46a07-0', usage_metadata={'input_tokens': 2, 'output_tokens': 11, 'total_tokens': 13, 'input_token_details': {'cache_read': 0}})

In [9]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [10]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [11]:
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below that can be convertable into python dictionary and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [12]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE,
)

In [13]:
quiz_chain = LLMChain(
    llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=False
)

  quiz_chain = LLMChain(


In [14]:
TEMPLATE2 = """
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [15]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["subject", "quiz"], template=TEMPLATE
)

In [17]:
review_chain = LLMChain(
    llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True
)

In [18]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True,
)

In [73]:
file_path = r"D:\AppliedGenAI\Python\mcqgen\data.txt"

In [20]:
file_path

'D:\\AppliedGenAI\\Python\\mcqgen\\data.txt'

In [74]:
with open(file_path, "r") as file:
    TEXT = file.read()

In [75]:
print(TEXT
      )

Early usage
In 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]

The term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, the definition was still in flux. After the 1985 lecture at the Chinese Academy of Sciences in Beijing, in 1997 C. F. Jeff Wu again sugge

In [23]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [76]:
NUMBER = 10
SUBJECT = "Data Science"
TONE = "medium"

In [77]:
# https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

# How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON),
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Early usage
In 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]

The term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, 

In [55]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:2651
Prompt Tokens:1468
Completion Tokens:1183
Total Cost:0.0


In [78]:
response

{'text': 'Early usage\nIn 1962, John Tukey described a field he called "data analysis", which resembles modern data science.[20] In 1985, in a lecture given to the Chinese Academy of Sciences in Beijing, C. F. Jeff Wu used the term "data science" for the first time as an alternative name for statistics.[21] Later, attendees at a 1992 statistics symposium at the University of Montpellier  II acknowledged the emergence of a new discipline focused on data of various origins and forms, combining established concepts and principles of statistics and data analysis with computing.[22][23]\n\nThe term "data science" has been traced back to 1974, when Peter Naur proposed it as an alternative name to computer science.[6] In 1996, the International Federation of Classification Societies became the first conference to specifically feature data science as a topic.[6] However, the definition was still in flux. After the 1985 lecture at the Chinese Academy of Sciences in Beijing, in 1997 C. F. Jeff W

In [79]:
quiz = response.get("quiz")

In [80]:
result =  llm.invoke(f"{quiz} remove all backticks from it")

In [81]:
result.content

'{\n  "1": {\n    "mcq": "Who first used the term \\"data science\\" as an alternative to statistics?",\n    "options": {\n      "a": "John Tukey",\n      "b": "Peter Naur",\n      "c": "C. F. Jeff Wu",\n      "d": "William S. Cleveland"\n    },\n    "correct": "c"\n  },\n  "2": {\n    "mcq": "In what year did C. F. Jeff Wu first suggest using \\"data science\\" as a replacement for \\"statistics\\"?",\n    "options": {\n      "a": "1974",\n      "b": "1985",\n      "c": "1997",\n      "d": "2001"\n    },\n    "correct": "c"\n  },\n  "3": {\n    "mcq": "Which year saw the International Federation of Classification Societies first feature data science as a conference topic?",\n    "options": {\n      "a": "1992",\n      "b": "1996",\n      "c": "1998",\n      "d": "2002"\n    },\n    "correct": "b"\n  },\n  "4": {\n    "mcq": "What popular terms were used in the 1990s to describe the process of finding patterns in large datasets?",\n    "options": {\n      "a": "Data analysis and data m

In [82]:
quiz = json.loads(result.content)

In [68]:
quiz

{'1': {'mcq': 'What early technology is considered a precursor to cloud computing, popularized through remote job entry (RJE)?',
  'options': {'a': 'The Internet',
   'b': 'Time-sharing',
   'c': 'Mainframe computers',
   'd': 'Virtualization'},
  'correct': 'b'},
 '2': {'mcq': "Who is credited with popularizing the 'cloud' metaphor for virtualized services in 1994, drawing from its use in networking and telecom?",
  'options': {'a': 'Jeff Bezos',
   'b': 'Bill Gates',
   'c': 'David Hoffman',
   'd': 'Mark Zuckerberg'},
  'correct': 'c'},
 '3': {'mcq': "Which company's 1996 business plan significantly contributed to the wider recognition of the term 'cloud computing', anticipating the success of online consumer file storage?",
  'options': {'a': 'IBM',
   'b': 'Microsoft',
   'c': 'Compaq Computer Corporation',
   'd': 'Apple'},
  'correct': 'c'},
 '4': {'mcq': 'In what year did Amazon Web Services (AWS) launch, marking a significant step in the development of cloud computing as we kn

In [83]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [84]:
quiz_table_data

[{'MCQ': 'Who first used the term "data science" as an alternative to statistics?',
  'Choices': 'a: John Tukey | b: Peter Naur | c: C. F. Jeff Wu | d: William S. Cleveland',
  'Correct': 'c'},
 {'MCQ': 'In what year did C. F. Jeff Wu first suggest using "data science" as a replacement for "statistics"?',
  'Choices': 'a: 1974 | b: 1985 | c: 1997 | d: 2001',
  'Correct': 'c'},
 {'MCQ': 'Which year saw the International Federation of Classification Societies first feature data science as a conference topic?',
  'Choices': 'a: 1992 | b: 1996 | c: 1998 | d: 2002',
  'Correct': 'b'},
 {'MCQ': 'What popular terms were used in the 1990s to describe the process of finding patterns in large datasets?',
  'Choices': 'a: Data analysis and data mining | b: Knowledge discovery and data mining | c: Big data and data science | d: Statistical learning and data science',
  'Correct': 'b'},
 {'MCQ': 'Who declared "Data Scientist: The Sexiest Job of the 21st Century" in 2012?',
  'Choices': 'a: Jeff Ham

In [85]:
quiz = pd.DataFrame(quiz_table_data)

In [86]:
quiz

Unnamed: 0,MCQ,Choices,Correct
0,"Who first used the term ""data science"" as an a...",a: John Tukey | b: Peter Naur | c: C. F. Jeff ...,c
1,In what year did C. F. Jeff Wu first suggest u...,a: 1974 | b: 1985 | c: 1997 | d: 2001,c
2,Which year saw the International Federation of...,a: 1992 | b: 1996 | c: 1998 | d: 2002,b
3,What popular terms were used in the 1990s to d...,a: Data analysis and data mining | b: Knowledg...,b
4,"Who declared ""Data Scientist: The Sexiest Job ...",a: Jeff Hammerbacher and William S. Cleveland ...,b
5,Which year did the American Statistical Associ...,a: 2002 | b: 2003 | c: 2014 | d: 2008,c
6,"To whom is the professional title of ""data sci...",a: Thomas H. Davenport and DJ Patil | b: DJ Pa...,b
7,In what year did William S. Cleveland advocate...,a: 1997 | b: 2001 | c: 2002 | d: 2008,b
8,"According to the text, what is a related marke...",a: Knowledge discovery | b: Data mining | c: B...,c
9,"In 1962, who described a field resembling mode...",a: C. F. Jeff Wu | b: John Tukey | c: William ...,b
