In [8]:
import os
import json
import pandas as pd
import traceback

from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.callbacks import get_openai_callback

import PyPDF2

In [9]:
load_dotenv() #take environment variables from .env

True

In [10]:
KEY = os.getenv("OPENAI_API_KEY")

In [11]:
llm = ChatOpenAI(openai_api_key = KEY, model_name = 'gpt-3.5-turbo', temperature = 0.3)


In [12]:
#Defining Format for response/output of quiz generation

RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    }

}

In [13]:
#Defining template for quiz generation prompt

TEMPLATE = """
Text: {text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the question to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON 
{response_json} 

"""

In [14]:
quiz_gen_prompt = PromptTemplate(
    input_variables= ["text","number","subject","tone","response_json"],
    template=TEMPLATE
)

In [15]:
# 1st Chain:

quiz_chain = LLMChain(llm=llm,prompt=quiz_gen_prompt,output_key="quiz",verbose=True)

  warn_deprecated(


In [16]:
TEMPLATE2 = """
You are an expert english grammarian and writer given a multiple choice quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of quiz. Only use at max 50 words for complexity.
If the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student's ability.
Quiz_MCQs:
{quiz}

Check from an expert english writer of the above quiz:
"""

In [17]:
quiz_eval_prompt = PromptTemplate(
    input_variables= ["subject","quiz"],
    template=TEMPLATE2
)

In [18]:
# 2nd Chain

review_chain = LLMChain(llm=llm,prompt=quiz_eval_prompt,output_key="review",verbose=True)

In [19]:
# Combining the Chains:

generate_evaluate_chain = SequentialChain(chains=[quiz_chain,review_chain],input_variables=["text","number","subject","tone","response_json"],
                                          output_variables=["quiz","review"],verbose=True)


In [21]:
file_path = r"D:\Users\Dell\mcqgenproj\data.txt"


In [22]:
with open(file_path,'r') as file:
    TEXT = file.read()

In [23]:
print(TEXT)

Sure, I can provide a comprehensive guide to supervised learning, covering everything from the basics to more advanced concepts. Here's an in-depth look at the topic:

## What is Supervised Learning?

Supervised learning is a type of machine learning where the model is trained on labeled data. This means that the algorithm learns from input-output pairs, where the input is a set of features and the output is the label or target. The goal of supervised learning is to learn a mapping from inputs to outputs so that the model can make accurate predictions on new, unseen data.

### Types of Supervised Learning

There are two main types of supervised learning problems:
1. **Regression**: Predicting a continuous output. For example, predicting house prices based on features like size, location, etc.
2. **Classification**: Predicting a discrete label. For example, classifying emails as spam or not spam.

## Key Concepts in Supervised Learning

### 1. **Training and Testing Data**

- **Training

In [24]:
# Serialize the python dictionary into the JSON-formatted string

json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [25]:
NUMBER=5
SUBJECT="supervised learning"
TONE="simple"

In [26]:
# How to setup token usage tracking in langchain

with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
    )

  warn_deprecated(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: Sure, I can provide a comprehensive guide to supervised learning, covering everything from the basics to more advanced concepts. Here's an in-depth look at the topic:

## What is Supervised Learning?

Supervised learning is a type of machine learning where the model is trained on labeled data. This means that the algorithm learns from input-output pairs, where the input is a set of features and the output is the label or target. The goal of supervised learning is to learn a mapping from inputs to outputs so that the model can make accurate predictions on new, unseen data.

### Types of Supervised Learning

There are two main types of supervised learning problems:
1. **Regression**: Predicting a continuous output. For example, predicting house prices based on features like size, location, etc.
2. **Classification**: Predicting a discrete label. For ex

In [27]:
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost: {cb.total_cost}")


Total Tokens: 2512
Prompt Tokens: 2066
Completion Tokens: 446
Total Cost: 0.0017020000000000002


In [28]:
response

{'text': "Sure, I can provide a comprehensive guide to supervised learning, covering everything from the basics to more advanced concepts. Here's an in-depth look at the topic:\n\n## What is Supervised Learning?\n\nSupervised learning is a type of machine learning where the model is trained on labeled data. This means that the algorithm learns from input-output pairs, where the input is a set of features and the output is the label or target. The goal of supervised learning is to learn a mapping from inputs to outputs so that the model can make accurate predictions on new, unseen data.\n\n### Types of Supervised Learning\n\nThere are two main types of supervised learning problems:\n1. **Regression**: Predicting a continuous output. For example, predicting house prices based on features like size, location, etc.\n2. **Classification**: Predicting a discrete label. For example, classifying emails as spam or not spam.\n\n## Key Concepts in Supervised Learning\n\n### 1. **Training and Test

In [29]:
quiz = response.get("quiz")

In [30]:
json.loads(quiz)

{'1': {'mcq': 'What is the goal of supervised learning?',
  'options': {'a': 'To learn from unlabeled data',
   'b': 'To learn from labeled data',
   'c': 'To predict future outcomes without any data',
   'd': 'To ignore the input features'},
  'correct': 'b'},
 '2': {'mcq': 'Which type of supervised learning predicts a continuous output?',
  'options': {'a': 'Regression',
   'b': 'Classification',
   'c': 'Clustering',
   'd': 'Association'},
  'correct': 'a'},
 '3': {'mcq': 'What is the function of a loss function in supervised learning?',
  'options': {'a': 'To maximize the error',
   'b': 'To measure the difference between predicted and actual output',
   'c': 'To ignore the model parameters',
   'd': 'To minimize the number of features'},
  'correct': 'b'},
 '4': {'mcq': 'Which algorithm is used for binary classification?',
  'options': {'a': 'Linear Regression',
   'b': 'Decision Trees',
   'c': 'Support Vector Machines',
   'd': 'k-Nearest Neighbors'},
  'correct': 'c'},
 '5': {

In [31]:
var1 = json.loads(quiz)

In [36]:
df = pd.DataFrame(var1)
df.head()

Unnamed: 0,1,2,3,4,5
mcq,What is the goal of supervised learning?,Which type of supervised learning predicts a c...,What is the function of a loss function in sup...,Which algorithm is used for binary classificat...,What is the purpose of feature engineering in ...
options,"{'a': 'To learn from unlabeled data', 'b': 'To...","{'a': 'Regression', 'b': 'Classification', 'c'...","{'a': 'To maximize the error', 'b': 'To measur...","{'a': 'Linear Regression', 'b': 'Decision Tree...",{'a': 'To remove all features from the dataset...
correct,b,a,b,c,b


In [33]:
df.to_csv("SupervisedLearningQuiz.csv",index = False)

In [34]:
from datetime import datetime

datetime.now().strftime('%m_%d_%Y,%H,%M,%S')

'06_30_2024,01,34,11'