In [None]:
!pip install -q transformers einops accelerate langchain bitsandbytes langchain_community

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m595.2 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m975.5/975.5 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m332.8/332.8 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.4/127.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer
import transformers
import torch
import os
import json
import re
import pandas as pd
import traceback

import warnings
warnings.filterwarnings('ignore')

In [None]:
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
    max_length=2000,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain

In [None]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [None]:
TEMPLATE1="""
Topic:{Topic}
You are an expert MCQ maker. Given the above topic, it is your job to \
create a quiz  of {Number} multiple choice questions for students having {Difficulty} Difficulty.
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {Number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [None]:
def get_prompt(Instruction):
  messages= [
    {"role": "system", "content": "You are a helpful assistant! who generates very good quality output based on user input"},
    {"role": "user", "content": Instruction}
  ]

  prompt = pipeline.tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
 )

  return prompt

In [None]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["Topic", "Number","Difficulty", "response_json"],
    template=get_prompt(TEMPLATE1)
    )

In [None]:
llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'eos_token_id':terminators,'temperature':0.1})

In [None]:
quiz_chain = LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [None]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis.
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [None]:
quiz_evaluation_prompt = PromptTemplate(input_variables=["quiz"], template=get_prompt(TEMPLATE2))

In [None]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [None]:
generate_evaluate_chain = SequentialChain(chains=[quiz_chain, review_chain], input_variables=["Topic", "Number","Difficulty", "response_json"],
                                        output_variables=["quiz", "review"], verbose=True,)

In [None]:
TOPIC = "Linear Regression"
NUMBER = 10
DIFFICULTY = 'Difficult'

In [None]:
response = generate_evaluate_chain(
        {
            "Topic": TOPIC,
            "Number": NUMBER,
            "Difficulty": DIFFICULTY,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant! who generates very good quality output based on user input<|eot_id|><|start_header_id|>user<|end_header_id|>

Topic:Linear Regression
You are an expert MCQ maker. Given the above topic, it is your job to create a quiz  of 10 multiple choice questions for students having Difficult Difficulty. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make 10 MCQs
### RESPONSE_JSON
{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here"

In [None]:
quiz=response.get("quiz")
mcqs_text = quiz
start_idx = mcqs_text.find('{')
end_idx = mcqs_text.rfind('}') + 1

mcqs_json_str = mcqs_text[start_idx:end_idx]

match = re.search(r'difficulty:\s*(.*)', mcqs_json_str, re.DOTALL)
if match:
    mcqs_text = match.group(1)
    mcqs_json = json.loads(mcqs_text)

In [None]:
quiz=response.get("quiz")

quiz_table_data = []
for key, value in mcqs_json.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [None]:
quiz=pd.DataFrame(quiz_table_data)

In [None]:
quiz.to_csv('/content/Linear_regression.csv')

In [1]:
import pandas as pd

In [31]:
df = pd.read_csv("/content/Linear_regression.csv")

In [32]:
df['Choices'][0]

'a: Independence of the residuals | b:  Normality of the residuals | c: Equal variance in the residuals | d: Multicollinearity among the predictors'

In [None]:
!pip install gradio

In [33]:
def create_mapping(choices_str):
    mapping = {}
    choices = choices_str.split(' | ')
    for choice in choices:
        key, value = choice.split(': ', 1)
        mapping[key.strip()] = value.strip()
    return mapping

# Apply create_mapping function to 'choices' column
df['map'] = df['Choices'].apply(create_mapping)

# Replace 'correct' column with corresponding values from 'map'
df['Correct'] = df.apply(lambda row: row['map'][row['Correct']], axis=1)

# Display the updated DataFrame
df

Unnamed: 0.1,Unnamed: 0,MCQ,Choices,Correct,map
0,0,The goal of a linear regression model is to fi...,a: Independence of the residuals | b: Normali...,Multicollinearity among the predictors,"{'a': 'Independence of the residuals', 'b': 'N..."
1,1,What is the purpose of centering and scaling i...,a: To reduce the effects of outliers | b: To i...,To improve the model's fit,"{'a': 'To reduce the effects of outliers', 'b'..."
2,2,"In linear regression, what is the term for the...",a: Residual | b: Error | c: Deviation | d: Dis...,Residual,"{'a': 'Residual', 'b': 'Error', 'c': 'Deviatio..."
3,3,Which of the following is NOT a type of linear...,a: Simple linear regression | b: Multiple line...,Logistic regression,"{'a': 'Simple linear regression', 'b': 'Multip..."
4,4,What is the term for the coefficient of determ...,a: R-squared | b: Coefficients | c: Standardiz...,R-squared,"{'a': 'R-squared', 'b': 'Coefficients', 'c': '..."
5,5,"In linear regression, what is the assumption t...",a: Homoscedasticity | b: Linearity | c: Indepe...,Linearity,"{'a': 'Homoscedasticity', 'b': 'Linearity', 'c..."
6,6,What is the primary purpose of cross-validatio...,a: To evaluate the model's fit | b: To prevent...,To prevent overfitting,"{'a': 'To evaluate the model's fit', 'b': 'To ..."
7,7,Which of the following is a disadvantage of us...,a: It is computationally expensive | b: It ass...,It is limited to a single predictor,"{'a': 'It is computationally expensive', 'b': ..."
8,8,What is the term for the process of selecting ...,a: Feature selection | b: Dimensionality reduc...,Variable selection,"{'a': 'Feature selection', 'b': 'Dimensionalit..."
9,9,Which of the following is a common problem tha...,a: Multicollinearity | b: Heteroscedasticity |...,Multicollinearity,"{'a': 'Multicollinearity', 'b': 'Heteroscedast..."


In [None]:
!pip install --upgrade gradio


In [37]:
import gradio as gr
import pandas as pd

# Function to evaluate the quiz
def evaluate_quiz(*responses):
    score = 0
    for idx, response in enumerate(responses):
        if response == df.iloc[idx]["Correct"]:
            score += 1
    return f"Your score is {score}/{len(df)}"

# Create Gradio interface
def create_quiz_interface(df):
    question_elements = []
    for idx, row in df.iterrows():
        # Split the choices using '|' as the delimiter and map them to 'a', 'b', 'c', 'd'
        choices = row['Choices'].split(' | ')
        choice_dict = {choice.split(': ')[0]: choice.split(': ')[1] for choice in choices}
        question_elements.append(gr.Radio(label=row['MCQ'], choices=list(choice_dict.values())))

    quiz_interface = gr.Interface(
        fn=evaluate_quiz,
        inputs=question_elements,
        outputs="text",
        title="QuizCrafter",
        description="Select the correct answers and submit to see your score."
    )
    return quiz_interface

# Initialize and launch the Gradio app
quiz_interface = create_quiz_interface(df)
quiz_interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://c40f98ef66aeff940e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


