## Download the papers

In [2]:
import requests
import os

def download_pdf_paper_from_url(url):
    paper_number = os.path.basename(url).strip(".pdf")
    res = requests.get(url)
    pdf_dir = "papers"
    os.makedirs(pdf_dir, exist_ok=True)  # Create the directory if it doesn't exist
    pdf_path = os.path.join(pdf_dir, f"{paper_number}.pdf")
    with open(pdf_path, 'wb') as f:
        f.write(res.content)
    return paper_number

link = "https://arxiv.org/pdf/2306.08302.pdf"
paper_number = download_pdf_paper_from_url(link)

### Load using PDFMiner (Langchain)

In [5]:
from langchain.document_loaders import PDFMinerLoader
docs    =   PDFMinerLoader(f"papers/{paper_number}.pdf").load()

### Chunking with langchain

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter   =   RecursiveCharacterTextSplitter(
    chunk_size=700, # Specify the character chunk size
    chunk_overlap=0, # "Allowed" Overlap across chunks
    length_function=len # Function used to evaluate the chunk size (here in terms of characters)
)

docs    =   text_splitter.split_documents(docs)

### Store vector embeddings using faiss (generated using openAI)

In [36]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings


os.environ["OPENAI_API_KEY"]  =  ""

embeddings  =   OpenAIEmbeddings()

In [37]:
vdb_chunks  =  FAISS.from_documents(docs, embedding=embeddings)

In [39]:
vdb_chunks.save_local("vdb_chunks", index_name="base_and_adjacent")

In [41]:
vdb_chunks  =   FAISS.load_local("vdb_chunks", embeddings, index_name="base_and_adjacent")
vdb_chunks.as_retriever().get_relevant_documents("What are KG-enhanced LLMs?")

[Document(page_content='Synergized LLMs + KGs. The synergy of LLMs and KGs\naims to integrate LLMs and KGs into a unified framework\n\nFig. 7. The general framework of the Synergized LLMs + KGs, which\ncontains four layers: 1) Data, 2) Synergized Model, 3) Technique, and\n4) Application.\n\nfrom KGs, it can significantly improve the performance\nof LLMs in accessing domain-specific knowledge [94]. To\nimprove the interpretability of LLMs, researchers also utilize\nKGs to interpret the facts [14] and the reasoning process of\nLLMs [95].\n\n3.1.2 LLM-augmented KGs', metadata={'source': 'papers/2306.08302.pdf'}),
 Document(page_content='7.4 Multi-Modal LLMs for KGs', metadata={'source': 'papers/2306.08302.pdf'}),
 Document(page_content='2) KG-enhanced LLM inference includes research that\nutilizes KGs during the inference stage of LLMs,\nwhich enables LLMs to access the latest knowledge\nwithout retraining.\n\n3) KG-enhanced LLM interpretability includes works that\nuse KGs to understand 

In [42]:
from langchain.llms import OpenAI
llm =   OpenAI(temperature=0.0) # Set temperature to 0.0 as we don't want creative answer

In [44]:
from langchain.chains import RetrievalQAWithSourcesChain
qa  =   RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=vdb_chunks.as_retriever())

In [47]:
qa({"question" : "Give me a brief overview about the paper"})

{'question': 'Give me a brief overview about the paper',
 'answer': ' This paper provides an overview of the advanced techniques in both Language Models (LLMs) and Knowledge Graphs (KGs). It covers the state-of-the-art LLMs and novel KGs, and discusses the challenges and future research directions. It was published in the Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), 2019, pp. 2463–2473.\n',
 'sources': 'papers/2306.08302.pdf'}