In [1]:
import os

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import load_dotenv
import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
all_pages = []

DIRPATH = "./data/pdfs/"

for filename in os.listdir(DIRPATH):
    filepath = os.path.join(DIRPATH, filename)
    loader = PyPDFLoader(filepath)
    pages = loader.load_and_split(
        CharacterTextSplitter()
    )
    all_pages.extend(pages)

In [4]:
embeddings = OpenAIEmbeddings()
vectors = FAISS.from_documents(all_pages, embeddings)

In [15]:
model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k")

prompt = PromptTemplate(
    input_variables=["contents", "query"],
    template="""
        You are an expert academic researcher that enjoys helping other researchers, like myself.
        Based on the following content from research papers that I am familiar with, and I know are related to my question: 
        
        {contents}

        {query}

        Please keep your replies concise but short. Do not answer something that was not asked.
        """
)

chain = LLMChain(llm=model, prompt=prompt)

In [16]:
def search_similar(query: str):
    similars = vectors.similarity_search(query, k=3)
    
    return [s.page_content for s in similars]

def reply(query: str):
    similars = search_similar(query)
    response = chain.run(contents=similars, query=query)

    return response

In [17]:
reply("What paper was funded by a Spanish Ministry? And what is the name of that Ministry?")

'The paper funded by a Spanish Ministry is "GRASP and VNS for solving the p-next center problem" by J. Sánchez-Oro, A. López-Sánchez, and A. Hernández-Díaz. The name of the Ministry is the Spanish Ministry of "Ciencia, Innovación y Universidades."'

In [19]:
reply("What is the main feature of the Fast Vertex Substitution?")

'The main feature of the Fast Vertex Substitution is its low worst-case complexity, which allows for efficient solving of the p-Center problem.'