In [139]:
import os
import json
import pandas as pd
import traceback

In [194]:
from dotenv import load_dotenv
load_dotenv()

key = os.getenv("API_KEY")

In [196]:

from langchain.chat_models import ChatOpenAI

#from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint




In [197]:
llm=ChatOpenAI(openai_api_key=key, model_name="gpt-4o-mini", temperature=0.5)
#llm=HuggingFaceEndpoint(
#    repo_id="microsoft/Phi-3-mini-4k-instruct",
#    task="text-generation"
#
#)

In [198]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2


In [199]:
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming to the text as well.
Make sure to format your responses like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

quiz_generation_prompt=PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
)

In [200]:
quiz_chain = LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [201]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students, \
you need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity. \
If the quiz is not at par with the cognitive and analytical ability of the students, \ 
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student's ability. \
Quiz MCQ's:
{quiz}

Check from an expert english writer for the above quiz:
"""

quiz_evaluation_prompt=PromptTemplate(
    input_variables=["subject", "quiz"],
    template=TEMPLATE2
)

review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [202]:
generate_evaluate_chain = SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                          output_variables=["quiz", "review"], verbose=True)

In [203]:
file_path=r"/Users/pranavtandra/Documents/mcq_generator/experiments/data.txt"
with open(file_path,'r') as file:
    TEXT = file.read()

In [204]:
TEXT

'n statistics, linear regression is a statistical model which estimates the linear relationship between a scalar response and one or more explanatory variables (also known as dependent and independent variables). The case of one explanatory variable is called simple linear regression; for more than one, the process is called multiple linear regression.[1] This term is distinct from multivariate linear regression, where multiple correlated dependent variables are predicted, rather than a single scalar variable.[2] If the explanatory variables are measured with error then errors-in-variables models are required, also known as measurement error models.\n\nIn linear regression, the relationships are modeled using linear predictor functions whose unknown model parameters are estimated from the data. Such models are called linear models.[3] Most commonly, the conditional mean of the response given the values of the explanatory variables (or predictors) is assumed to be an affine function of 

In [205]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [206]:
NUMBER=5
SUBJECT="Machine Learning"
TONE="simple"

In [207]:
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text":TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:n statistics, linear regression is a statistical model which estimates the linear relationship between a scalar response and one or more explanatory variables (also known as dependent and independent variables). The case of one explanatory variable is called simple linear regression; for more than one, the process is called multiple linear regression.[1] This term is distinct from multivariate linear regression, where multiple correlated dependent variables are predicted, rather than a single scalar variable.[2] If the explanatory variables are measured with error then errors-in-variables models are required, also known as measurement error models.

In linear regression, the relationships are modeled using linear predictor functions whose unknown model parameters are estimated from the data. Such models are called linear models.[3] Most commonly, the 

In [208]:
response

{'text': 'n statistics, linear regression is a statistical model which estimates the linear relationship between a scalar response and one or more explanatory variables (also known as dependent and independent variables). The case of one explanatory variable is called simple linear regression; for more than one, the process is called multiple linear regression.[1] This term is distinct from multivariate linear regression, where multiple correlated dependent variables are predicted, rather than a single scalar variable.[2] If the explanatory variables are measured with error then errors-in-variables models are required, also known as measurement error models.\n\nIn linear regression, the relationships are modeled using linear predictor functions whose unknown model parameters are estimated from the data. Such models are called linear models.[3] Most commonly, the conditional mean of the response given the values of the explanatory variables (or predictors) is assumed to be an affine fun

In [209]:
quiz=response.get("quiz")

In [210]:
quiz

'{"1": {"mcq": "What is the primary goal of linear regression?", "options": {"a": "To find non-linear relationships", "b": "To explain the variation in a response variable", "c": "To increase the number of variables", "d": "To predict the joint distribution of variables"}, "correct": "b"}, "2": {"mcq": "What type of regression uses one explanatory variable?", "options": {"a": "Multiple linear regression", "b": "Simple linear regression", "c": "Multivariate regression", "d": "Non-linear regression"}, "correct": "b"}, "3": {"mcq": "Which method is commonly used to fit linear regression models?", "options": {"a": "Maximum likelihood estimation", "b": "Least squares approach", "c": "Bayesian inference", "d": "Gradient descent"}, "correct": "b"}, "4": {"mcq": "What is the term for a model that predicts multiple correlated dependent variables?", "options": {"a": "Simple linear regression", "b": "Multiple linear regression", "c": "Multivariate linear regression", "d": "Univariate regression"}

In [211]:
quiz=json.loads(quiz)

In [212]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [213]:
quiz_table_data

[{'MCQ': 'What is the primary goal of linear regression?',
  'Choices': 'a: To find non-linear relationships | b: To explain the variation in a response variable | c: To increase the number of variables | d: To predict the joint distribution of variables',
  'Correct': 'b'},
 {'MCQ': 'What type of regression uses one explanatory variable?',
  'Choices': 'a: Multiple linear regression | b: Simple linear regression | c: Multivariate regression | d: Non-linear regression',
  'Correct': 'b'},
 {'MCQ': 'Which method is commonly used to fit linear regression models?',
  'Choices': 'a: Maximum likelihood estimation | b: Least squares approach | c: Bayesian inference | d: Gradient descent',
  'Correct': 'b'},
 {'MCQ': 'What is the term for a model that predicts multiple correlated dependent variables?',
  'Choices': 'a: Simple linear regression | b: Multiple linear regression | c: Multivariate linear regression | d: Univariate regression',
  'Correct': 'c'},
 {'MCQ': 'What should be used if a 

In [214]:
quiz=pd.DataFrame(quiz_table_data)

In [215]:
quiz

Unnamed: 0,MCQ,Choices,Correct
0,What is the primary goal of linear regression?,a: To find non-linear relationships | b: To ex...,b
1,What type of regression uses one explanatory v...,a: Multiple linear regression | b: Simple line...,b
2,Which method is commonly used to fit linear re...,a: Maximum likelihood estimation | b: Least sq...,b
3,What is the term for a model that predicts mul...,a: Simple linear regression | b: Multiple line...,c
4,What should be used if a dataset has many larg...,a: Mean Squared Error | b: Least squares appro...,c


In [216]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:2029
Prompt Tokens:1342
Completion Tokens:687
Total Cost:0.0006134999999999999
