In [74]:
import os 
import re
import json 
import pandas as pd
import traceback

In [32]:

import langchain
from langchain.chat_models import ChatOpenAI
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain, SequentialChain, ConversationChain
from langchain_community.callbacks import get_openai_callback
# from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate
from langchain.agents import AgentType
from langchain.agents import load_tools 
from langchain.agents import initialize_agent
import PyPDF2

In [13]:
from dotenv import load_dotenv
load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
serpapi_key = os.getenv("serpapi_key")
os.environ['HUGGINGFACEHUB_API_TOKEN']=HUGGINGFACEHUB_API_TOKEN 

## Test HUGGINGFACE with google-search-results

In [14]:
question = "Who won the FIFA World Cup in the year 2022? "

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate.from_template(template)

repo_id =  "mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
)
llm_chain = LLMChain(prompt=prompt, llm=llm)
print(llm_chain.run(question))
tool = load_tools(["serpapi"],serpapi_api_key=serpapi_key,llm=llm)
agent = initialize_agent(tool, llm,agent = AgentType.ZERO_SHOT_REACT_DESCRIPTION,verbose=True)
agent.run("world cup 2022 winner")

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.
  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Abdel\.cache\huggingface\token
Login successful


  warn_deprecated(
  warn_deprecated(


 The FIFA World Cup is held every four years, so the year 2022 would not be an actual year of the tournament. The most recent World Cup was held in 2018, and the next one is scheduled for Qatar in 2022. However, as of now, the 2022 World Cup is still in progress, so we cannot determine a winner yet. Therefore, no team has won the FIFA World Cup in the year 2022.


In [21]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}


In [48]:
TEMPLATE = """
Text:{text}
You are an expert MCQ Marker. Given the above text, it is your job to create a Quiz of {number} multiple choice questions
for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide.
Ensure to make {number} MCQs.

### RESPONSE_JSON 
{response_json}

"""

quiz_gen_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject","tone","response_json"],
    template = TEMPLATE
)

In [49]:
quiz_chain = LLMChain(prompt=quiz_gen_prompt, llm=llm,output_key='quiz',verbose=True)


In [50]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [60]:
quiz_eval_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE)

review_chain=LLMChain(llm=llm, prompt=quiz_eval_prompt, output_key="review", verbose=True)


In [61]:
gen_eval_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                        output_variables=["quiz", "review"], verbose=True,)

In [62]:
file_path = r"D:\GenAI\MCQgen\data.txt"

In [63]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [64]:
print(TEXT)

Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]

Biologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdisciplines within biology, each defined by the nature of their research questions and the tools that they use.[7][8][9] Like other scientists, bio

In [65]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [102]:
NUMBER=5
SUBJECT="biology"
TONE="simple"
#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

#How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response=gen_eval_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]

Biologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdiscipline


[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]

Biologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdisciplines within biology, each 

In [103]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:0
Prompt Tokens:0
Completion Tokens:0
Total Cost:0.0


In [104]:
response

{'text': 'Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]\n\nBiologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdisciplines within biology, each defined by the nature of their research questions and the tools that they use.[7][8][9] Like other sci

In [108]:
quiz=response.get("quiz")

'}}"c" :"tcerroc" ,}"sllec fo noitcnuf dna erutcurts eht fo yduts ehT" :"d" ,"efil fo noitulove dna nigiro eht fo yduts ehT" :"c" ,"smsinagro fo noitaluger lanretni eht fo yduts ehT" :"b" ,"smsinagro ni gnissecorp ygrene fo yduts ehT" :"a"{ :"snoitpo" ,"?ygoloib fo emeht rojam a TON si gniwollof eht fo hcihW" :"qcm"{ :"5"\n,}"a" :"tcerroc" ,}"efil fo smrof tnereffid yfissalc oT" :"d" ,"smsinagro ni snoitcaer lacimehc eht ezylana oT" :"c" ,"smsinagro fo seitreporp lacisyhp eht yduts oT" :"b" ,"dlrow larutan eht tuoba snoisulcnoc mrof dna sesehtopyh tset oT" :"a"{ :"snoitpo" ,"?ygoloib ni rof desu dohtem cifitneics eht si tahW" :"qcm"{ :"4"\n,}"c" :"tcerroc" ,}"sciteneG" :"d" ,"ygoloib yranoitulovE" :"c" ,"ygoloib ralulleC" :"b" ,"ygoloib raluceloM" :"a"{ :"snoitpo" ,"?dellac efil fo noitulove dna nigiro eht fo yduts eht si tahW" :"qcm"{ :"3"\n,}"b" :"tcerroc" ,}"sllec fo noitcnuf dna erutcurts eht fo yduts ehT" :"d" ,"smsinagro ni snoitcaer lacimehc eht fo yduts ehT" :"c" ,"efil fo noit

In [109]:
quiz=json.loads( quiz[quiz.index("{"):len(quiz)-quiz[::-1].index("}") ])

In [110]:
quiz

{'1': {'mcq': 'What is biology the scientific study of?',
  'options': {'a': 'The study of the physical world',
   'b': 'The scientific study of life',
   'c': 'The study of the chemical elements',
   'd': "The study of the Earth's atmosphere"},
  'correct': 'b'},
 '2': {'mcq': 'What is one major theme of biology?',
  'options': {'a': 'The study of the physical properties of organisms',
   'b': 'The study of the origin and evolution of life',
   'c': 'The study of the chemical reactions in organisms',
   'd': 'The study of the structure and function of cells'},
  'correct': 'b'},
 '3': {'mcq': 'What is the study of the origin and evolution of life called?',
  'options': {'a': 'Molecular biology',
   'b': 'Cellular biology',
   'c': 'Evolutionary biology',
   'd': 'Genetics'},
  'correct': 'c'},
 '4': {'mcq': 'What is the scientific method used for in biology?',
  'options': {'a': 'To test hypotheses and form conclusions about the natural world',
   'b': 'To study the physical propertie

In [111]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [112]:
quiz_table_data

[{'MCQ': 'What is biology the scientific study of?',
  'Choices': "a: The study of the physical world | b: The scientific study of life | c: The study of the chemical elements | d: The study of the Earth's atmosphere",
  'Correct': 'b'},
 {'MCQ': 'What is one major theme of biology?',
  'Choices': 'a: The study of the physical properties of organisms | b: The study of the origin and evolution of life | c: The study of the chemical reactions in organisms | d: The study of the structure and function of cells',
  'Correct': 'b'},
 {'MCQ': 'What is the study of the origin and evolution of life called?',
  'Choices': 'a: Molecular biology | b: Cellular biology | c: Evolutionary biology | d: Genetics',
  'Correct': 'c'},
 {'MCQ': 'What is the scientific method used for in biology?',
  'Choices': 'a: To test hypotheses and form conclusions about the natural world | b: To study the physical properties of organisms | c: To analyze the chemical reactions in organisms | d: To classify different f

In [113]:
quiz=pd.DataFrame(quiz_table_data)

In [114]:
quiz.to_csv("mcq_ml.csv",index=False)

In [115]:
from datetime import datetime
datetime.now().strftime('%m_%d_%Y_%H_%M_%S')

'05_04_2024_23_52_41'