In [None]:
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import retrieval_qa



In [29]:
## Read the pdf from folder


loader = PyPDFDirectoryLoader("../resource")
documents = loader.load()

text_spliter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
final_doucment = text_spliter.split_documents(documents)
final_doucment[0]

len(final_doucment)

992

In [30]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

In [31]:
import  numpy as np
print(np.array(huggingface_embeddings.embed_query(final_doucment[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_doucment[0].page_content)).shape)

vectorstore=FAISS.from_documents(final_doucment[:120],huggingface_embeddings)

[-3.43656987e-02 -2.03952659e-03 -2.69677341e-02  1.49809057e-02
  4.70807031e-02  1.82478167e-02  6.79858588e-03  9.49508743e-04
 -6.15613069e-03  3.33370129e-03  2.53311056e-03 -4.87901568e-02
  3.55426148e-02  7.61935189e-02  3.68480831e-02  2.45391135e-03
  5.29868947e-03  1.71135832e-02  3.45081240e-02  1.82932373e-02
 -1.09125571e-02 -3.15294079e-02 -4.81968094e-03 -2.03812961e-02
  2.36594602e-02  1.36746112e-02 -3.20756622e-02 -5.13190255e-02
 -1.45690376e-02 -2.44941264e-01 -5.81144821e-04 -3.06262523e-02
  4.30409685e-02  2.52099261e-02 -3.30108367e-02  2.10357728e-04
  3.50887980e-03 -2.52560452e-02 -2.02201810e-02  1.17690088e-02
 -1.61355529e-02  4.49064337e-02  5.76283969e-02 -1.82244554e-02
 -1.31039890e-02  4.28786501e-03 -1.34173138e-02  2.36733016e-02
 -5.51092066e-02 -5.43326419e-03  2.16202009e-02 -1.80321597e-02
 -1.86149229e-03  5.40488176e-02 -3.24054547e-02  6.38962956e-03
  1.76633652e-02  8.65062475e-02  2.13475581e-02  2.06081253e-02
  6.10375330e-02  3.93398

In [33]:
query="what is knowledge Graph?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

answer that determines the factual accuracy of the input query.
Fig. 1 provides a high-level overview of TrumorGPT.
A. Semantic Knowledge Graph
A semantic knowledge graph is an effective mechanism
for encapsulating knowledge in a format that is both struc-
tured and interpretable by machines. This graph consists of
vertices that symbolize entities and edges that represent the
connections between them. The “semantic” aspect of the
graph ensures that entities and their interrelations are based
on meaningful, contextually relevant concepts, making them
understandable to both machines and humans.
We represent a semantic knowledge graph as a directed
graph G = {E, R, F}, where E denotes the set of entities, R
Authorized licensed use limited to: INDIAN INSTITUTE OF TECHNOLOGY ROORKEE. Downloaded on January 20,2025 at 07:01:29 UTC from IEEE Xplore.  Restrictions apply.


In [34]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x336907a10> search_kwargs={'k': 3}


In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']=""

In [None]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500}

)
query="What is the health insurance coverage?"
hf.invoke(query)

In [None]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300}
)

llm = hf 
llm.invoke(query)

In [35]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [36]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [37]:
retrievalQA=retrieval_qa.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

AttributeError: module 'langchain.chains.retrieval_qa' has no attribute 'from_chain_type'