In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

Creating Embeddings and storing it in a faiss Database

In [4]:
# Load Documents using PyPDF LOader
loader = PyPDFLoader("Big Mac Index.pdf") # change document Here
pages = loader.load()

In [5]:
# split documents
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

In [6]:
embeddings=OpenAIEmbeddings() # open ai emebeddings
db = FAISS.from_documents(docs,embeddings ) # creating Faiss Vector store


In [7]:
db.save_local("faiss_index") # saving Faiss index in local


Loading data

In [8]:
embeddings=OpenAIEmbeddings() # open ai emebeddings

db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)# loading saved Faiss index in local


In [9]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# retriver added 
retriever = db.as_retriever()


# RAG chain

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_core.prompts import PromptTemplate


# Prompt from https://github.com/langchain-ai/langchain/tree/master/libs/langchain/langchain/chains/qa_generation
template = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions.
Given a piece of context, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities.
When coming up with this question/answer pair

Please come up with a question/answer pair, in JSON format, for the following context:
----------------
{context}

The User will specify how many and what type of questions it wants by {question}

The type of questions can be of  three categories: 
1.True or False 
2.Multiple Choice Questions (MCQs)
3.one-word answers.

Specify the type of each question as
1.True or False = True/False
2.Multiple Choice Questions (MCQs) = MCQs
3.one-word answers. = one-word answer

"""

# simple RAG
custom_rag_prompt = PromptTemplate.from_template(template)


rag_chain = (
    {"context": retriever , "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

# invoking the chain with number of questions

In [11]:
# number of question of each type to generate
mcq=2
OneWord =2
T_F=2

Total = mcq+OneWord+T_F
z = f"Total {Total} questions, {T_F} of which are True or False questions, {mcq} are Multiple Choice Questions (MCQs), and {OneWord} are one-word answer questions."

ans=rag_chain.invoke(z)

In [12]:
ans

'{\n    "questions": [\n        {\n            "question": "True or False: The Big Mac Index is limited by geographical coverage due to the presence of the McDonald\'s franchise.",\n            "answer": "True",\n            "type": "True/False"\n        },\n        {\n            "question": "True or False: The price of a Big Mac in a country is solely determined by relative currency values.",\n            "answer": "False",\n            "type": "True/False"\n        },\n        {\n            "question": "Which country had the most expensive Big Mac in July 2023?",\n            "options": ["Switzerland", "Norway", "Uruguay", "Argentina", "EU", "Sweden"],\n            "answer": "Switzerland",\n            "type": "MCQs"\n        },\n        {\n            "question": "In which city did an average local worker have to work the longest to earn enough to buy a Big Mac in 2015?",\n            "options": ["Nairobi", "Manila", "Mexico City", "Jakarta", "Cairo", "Kyiv"],\n            "answer

# Making the File Questions.txt

In [13]:
import json

# Dump Json file to TXT

data = json.loads(ans)

In [14]:
data

{'questions': [{'question': "True or False: The Big Mac Index is limited by geographical coverage due to the presence of the McDonald's franchise.",
   'answer': 'True',
   'type': 'True/False'},
  {'question': 'True or False: The price of a Big Mac in a country is solely determined by relative currency values.',
   'answer': 'False',
   'type': 'True/False'},
  {'question': 'Which country had the most expensive Big Mac in July 2023?',
   'options': ['Switzerland',
    'Norway',
    'Uruguay',
    'Argentina',
    'EU',
    'Sweden'],
   'answer': 'Switzerland',
   'type': 'MCQs'},
  {'question': 'In which city did an average local worker have to work the longest to earn enough to buy a Big Mac in 2015?',
   'options': ['Nairobi', 'Manila', 'Mexico City', 'Jakarta', 'Cairo', 'Kyiv'],
   'answer': 'Nairobi',
   'type': 'MCQs'},
  {'question': 'What is the implied exchange rate according to the Big Mac index for July 2023?',
   'answer': '1.20 SFr/USD',
   'type': 'one-word answer'},
  {

In [15]:


# Open a text file in write mode

with open("questions.txt", "w") as file:
    for question_data in data["questions"]:
        file.write("Question: {}\n".format(question_data["question"]))

        if question_data["type"] == 'MCQs' :
            file.write("Options:\n")
            for option in question_data["options"]:
                file.write("- {}\n".format(option))
        
        file.write("\nAnswer: {}\n\n".format(question_data["answer"]))
