In [3]:
pip install -qU pinecone-client==3.0.0  pinecone-datasets==0.7.0  langchain-pinecone==0.0.3 

Note: you may need to restart the kernel to use updated packages.


In [1]:
from pinecone import Pinecone


  from tqdm.autonotebook import tqdm


In [20]:
from langchain_community.document_loaders import PyPDFLoader

from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = PyPDFLoader("Big Mac Index.pdf") # change document Here
pages = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

embeddings=OpenAIEmbeddings() # open ai emebeddings



In [21]:
from langchain_pinecone import PineconeVectorStore
index_name = "question-maker-rag"

docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)

In [22]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
retriever = docsearch.as_retriever()


# RAG chain

In [6]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_core.prompts import PromptTemplate


# Prompt from https://github.com/langchain-ai/langchain/tree/master/libs/langchain/langchain/chains/qa_generation
template = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions.
Given a piece of context, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities.
When coming up with this question/answer pair

Please come up with a question/answer pair, in JSON format, for the following context:
----------------
{context}

The User will specify how many and what type of questions it wants by {question}

The type of questions can be of  three categories: 
1.True or False 
2.Multiple Choice Questions (MCQs)
3.one-word answers.

Specify the type of each question as
1.True or False = True/False
2.Multiple Choice Questions (MCQs) = MCQs
3.one-word answers. = one-word answer

"""

# simple RAG
custom_rag_prompt = PromptTemplate.from_template(template)


rag_chain = (
    {"context": retriever , "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

# invoking the chain with number of questions

In [7]:
# number of question of each type to generate
mcq=2
OneWord =2
T_F=2

Total = mcq+OneWord+T_F
z = f"Total {Total} questions, {T_F} of which are True or False questions, {mcq} are Multiple Choice Questions (MCQs), and {OneWord} are one-word answer questions."

ans=rag_chain.invoke(z)

In [8]:
ans

'{\n    "questions": [\n        {\n            "question": "True or False: The Big Mac Index is limited by geographical coverage due to the presence of the McDonald\'s franchise in Africa.",\n            "answer": "True",\n            "type": "True/False"\n        },\n        {\n            "question": "True or False: The Big Mac Index is a reliable measurement of purchasing power parity according to all economists.",\n            "answer": "False",\n            "type": "True/False"\n        },\n        {\n            "question": "Which country had the most expensive Big Mac in July 2023?",\n            "options": ["Switzerland", "Norway", "Uruguay", "Argentina", "EU", "Sweden"],\n            "answer": "Switzerland",\n            "type": "MCQs"\n        },\n        {\n            "question": "In which city did it take the least amount of time to earn enough to buy a Big Mac in July 2015?",\n            "options": ["Hong Kong", "Luxembourg", "Tokyo", "Zürich", "Miami", "Geneva"],\n     

In [29]:
import os
pinecone_api_key = os.environ.get('PINECONE_API_KEY')

index= Pinecone(api_key=pinecone_api_key).Index(index_name)



In [30]:

index.delete(delete_all=True)

{}