In [1]:
from dotenv import load_dotenv

load_dotenv()

import os

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANG_CHAIN_PROJECT"] = "test-preparer"



In [21]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain_community.document_loaders import Docx2txtLoader,UnstructuredPowerPointLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [26]:
def generate_mcq_question_based_on_documents(docs:list,number=10):
      #docs is a list of documents like [Document(page_content='...', metadata={'source': '...'}), Document(page_content='...', metadata={'source': '...'})]
      text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
      docs = text_splitter.split_documents(docs)
      print(docs)
      RESPONSE_JSON = """{
            "1": {
                "mcq": "multiple choice question",
                "options": {
                    "a": "choice here",
                    "b": "choice here",
                    "c": "choice here",
                    "d": "choice here",
                },
                "correct": "correct answer",
            },
            "2": {
                "mcq": "multiple choice question",
                "options": {
                    "a": "choice here",
                    "b": "choice here",
                    "c": "choice here",
                    "d": "choice here",
                },
                "correct": "correct answer",
            },
            "3": {
                "mcq": "multiple choice question",
                "options": {
                    "a": "choice here",
                    "b": "choice here",
                    "c": "choice here",
                    "d": "choice here",
                },
                "correct": "correct answer",
            },
      }"""
      
      prompt = PromptTemplate.from_template("""
            Text:{text}
            You are an expert MCQ maker. Given the above text, it is your job to \
            create a quiz  of {number} multiple choice questions for students. 
            Make sure the questions are not repeated and check all the questions to be conforming the text as well.
            Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
            Ensure to make {number} MCQs
            ### RESPONSE_JSON
            {response_json}""")
      chain = LLMChain(llm=llm, prompt=prompt,output_parser=JsonOutputParser(),)
      return chain.invoke({"text":docs,"number":number,"response_json":RESPONSE_JSON})



In [12]:
doc=PyPDFLoader('./UNIT -2 Sensors & Transducers Jan  2025.pdf')
doc=doc.load()

In [28]:
res=generate_mcq_question_based_on_documents(doc)


[Document(metadata={'producer': 'Microsoft® PowerPoint® 2019', 'creator': 'Microsoft® PowerPoint® 2019', 'creationdate': '2025-01-20T18:03:16+05:30', 'title': 'PowerPoint Presentation', 'author': 'Chowdhury, Sayantan', 'moddate': '2025-01-20T18:03:16+05:30', 'source': './UNIT -2 Sensors & Transducers Jan  2025.pdf', 'total_pages': 113, 'page': 0, 'page_label': '1'}, page_content='UNIT -2\nSensors and Transducers'), Document(metadata={'producer': 'Microsoft® PowerPoint® 2019', 'creator': 'Microsoft® PowerPoint® 2019', 'creationdate': '2025-01-20T18:03:16+05:30', 'title': 'PowerPoint Presentation', 'author': 'Chowdhury, Sayantan', 'moddate': '2025-01-20T18:03:16+05:30', 'source': './UNIT -2 Sensors & Transducers Jan  2025.pdf', 'total_pages': 113, 'page': 1, 'page_label': '2'}, page_content='• Sensors and transducers: Introduction,\nPerformance characteristics of transducers,\nTransducer for displacement (Potentiometer,\nstrain-gauge, Optical encoder, LVDT, Hall effect\nsensor); velocity

In [41]:
def get_documents(file_path:str):
      if file_path.endswith('.pdf'):
            return PyPDFLoader(file_path).load()
      elif file_path.endswith('.docx'):
            return Docx2txtLoader(file_path).load()
      elif file_path.endswith('.pptx'):
            return UnstructuredPowerPointLoader(file_path).load()
      elif file_path.endswith('.txt'):
            return TextLoader(file_path).load()
      else:
            raise ValueError(f"Unsupported file type: {file_path}")

In [14]:
def merge_documents(file_paths:list):
      return [item for sublist in [get_documents(file_path) for file_path in file_paths] for item in sublist]
