In [30]:
## RAG Application With GPT-4o

In [31]:
!pip install langchain faiss-cpu huggingface-hub langchain_community pypdf transformers sentence-transformers



# 1. Get a Data Loader


In [32]:
from langchain.document_loaders import PyPDFLoader

In [33]:
# pdf_path = "/Users/rasagnyagudipudi/Desktop/chatbot/input.pdf"

pdf_path = "input.pdf"

loader = PyPDFLoader(pdf_path)

pages = loader.load_and_split()

pages



[Document(metadata={'source': 'input.pdf', 'page': 0}, page_content='STUDENT \nRESOURCE BOOK'),
 Document(metadata={'source': 'input.pdf', 'page': 1}, page_content='1\nContents \n1. Message from the Vice Chancellor \n2. Message from the Director \n3. Deﬁni9ons \n4. Admission \n5. Academics \n6. Examina9on & Evalua9on \n7. Placement Assistance \n8. Student Support Services Guidelines \n9. People you should know'),
 Document(metadata={'source': 'input.pdf', 'page': 2}, page_content='2\n \nMessage from the Vice Chancellor \nDr. Ramesh Bhat \nI take this opportunity of congratula9ng all of you on your decision to join NMIMS Global \nAccess School for Con 9nuing Educa 9on (NGA – SCE) of NMIMS (Deemed – to - be –  \nUniversity. I am happy that you have joined the growing community of learners at  \nNMIMS. \nShri Vile Parle Kelavani Mandal, a Public Trust and a Society in Mumbai since 1934 whose \ngoal is to advance the cause of quality educa9on in India, set up Narsee Monjee Ins9tute \nof Ma

# 2. Convert data to Vector Database


In [34]:
# from langchain_objectbox.vectorstores import ObjectBox ##vector Database
# from langchain_openai import OpenAIEmbeddings

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [36]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=64,
    separators=['\n\n', '\n', '(?=>\. )', ' ', '']
)
docs  = text_splitter.split_documents(pages)

In [37]:
docs

[Document(metadata={'source': 'input.pdf', 'page': 0}, page_content='STUDENT \nRESOURCE BOOK'),
 Document(metadata={'source': 'input.pdf', 'page': 1}, page_content='1\nContents \n1. Message from the Vice Chancellor \n2. Message from the Director \n3. Deﬁni9ons \n4. Admission \n5. Academics \n6. Examina9on & Evalua9on \n7. Placement Assistance \n8. Student Support Services Guidelines \n9. People you should know'),
 Document(metadata={'source': 'input.pdf', 'page': 2}, page_content='2\n \nMessage from the Vice Chancellor \nDr. Ramesh Bhat \nI take this opportunity of congratula9ng all of you on your decision to join NMIMS Global \nAccess School for Con 9nuing Educa 9on (NGA – SCE) of NMIMS (Deemed – to - be –  \nUniversity. I am happy that you have joined the growing community of learners at  \nNMIMS. \nShri Vile Parle Kelavani Mandal, a Public Trust and a Society in Mumbai since 1934 whose \ngoal is to advance the cause of quality educa9on in India, set up Narsee Monjee Ins9tute \nof Ma

In [38]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
embeddings

  embeddings = HuggingFaceEmbeddings()


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

# 3. Make a RAG pipeline


In [39]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)

In [50]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")

hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=500,
    do_sample=True,
    temperature=0.7
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)

In [51]:
from langchain.chains.question_answering import load_qa_chain

retriever = db.as_retriever(search_kwargs={"k": 5})
chain = load_qa_chain(llm, chain_type="stuff")

In [55]:
q = "what is the Eligibility Criteria for registering for Placement Activities?"

In [56]:
retrieved_docs = retriever.get_relevant_documents(q)

context = " ".join([doc.page_content for doc in retrieved_docs])

query = f"""
Question: {q}

If the below context is relevant to the question, provide a detailed answer. If the context is irrelevant, respond with exactly:
'Sorry, I didn't understand your question. Do you want to connect with a live agent?'

Context:
{context}


"""
full_prompt = query

response = chain.invoke({
    "input_documents":retrieved_docs,
    "question":full_prompt})
user_readable_answer = response["output_text"]
user_readable_answer

'90% of courses 9ll Semester IV should be successfully completed at the 9me of registra9on for placement ac9vi9es (in Semester V).'

In [57]:
response

{'input_documents': [Document(metadata={'source': 'input.pdf', 'page': 45}, page_content='45\n• Students are required to provide an aiached applica9on duly signed for Issuance of \nTranscripts. The applicant is required to furnish the following details / documents \nwith the applica9on if it needs to be sent by the University (charges applicable) \n• Aiested copies of Grade Sheets/Mark sheets / Final Cer9ﬁcate \n• Copy/ies of Prospectus or communica9on received from Professional Body/ \nManagement / Educa9onal Ins9tu9on/s as applicable, requiring you to submit \ntranscripts. \nPlacement Guidelines: \nPlacement assistance is oﬀered to students however it is the preroga9ve of the Schools \n& Campuses to decide, which of the programs this service should be oﬀered. \nStudents are expected to maintain decorum and abide by the guidelines during \nplacement processes. In the event of non-conformance to the placement guidelines,  \nthe School reserves the right to ini9ate correc9ve ac9on. \nIn