In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import load_dotenv
import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
loader = PyPDFLoader("./data/pdfs/sanchezoro2022.pdf")
pages = loader.load_and_split()

In [5]:
embeddings = OpenAIEmbeddings()
vectors = FAISS.from_documents(pages, embeddings)

In [6]:
def search_similar(query: str):
    similars = vectors.similarity_search(query, k=3)
    
    return [s.page_content for s in similars]

In [7]:
search_similar("What is the problem being solved?")

['J. Sánchez-Oro,  A.D. López-Sánchez,  A.G. Hernández-Díaz  et al. European  Journal of Operational  Research xxx (xxxx) xxx ARTICLE  IN PRESS  \nJID: EOR [m5G; March 17, 2022;0:38  ] \nwith the aim of having an eﬃcient  local search procedure.  Then, \nlet j ⋆ be the furthest  α-neighbor  facility of any demand  point from \nthose in P . Notice that one of these facilities  determine  the value \nof the objective  function.  This heuristic  selection  allows us to avoid \nexploring  the complete  1-swap  neighborhood.  More formally,  for \nany i ∈ N \\ P , the facility j ⋆ is determined  as: \nj ⋆ ← arg min \nA ⊂P \n| A | = α{ max \nj∈ A d (i, j ) } . \nTherefore,  the set of neighbor  solutions  of P is mathematically  \ndeﬁned  as: \nN S (P ) ← { Q : Q ← Swap (P, i, j ⋆ ) , ∀ i ∈ N \\ P } \nThe second  key element  of a local search consists  in deﬁning  \nthe way in which the considered  neighborhood  is explored.  The \nmost extended  strategies  are Best Improvement  (BI) and F

In [8]:
model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

In [9]:
prompt = PromptTemplate(
    input_variables=["contents", "query"],
    template="""
        You are an academic researcher. Help me understand a selection of papers that focus on the following:

        {contents}

        Now, my question is: {query}
        """
)

chain = LLMChain(llm=model, prompt=prompt)

In [10]:
def reply(query: str):
    similars = search_similar(query)
    response = chain.run(contents=similars, query=query)

    return response

In [11]:
reply("What problem is being solved?")

'The problem being solved in the selection of papers is the α-p Center Problem (α-pCP). The α-pCP aims to select a set of p facilities from a given set of points in order to minimize the maximum distance between each demand point and its αth closest facility. The objective is to minimize the radius of the maximal circle (among the p allowed) so that each demand point is covered by at least α circles.'

In [12]:
reply("How did the authors compared the beta parameter?")

'The authors compared the beta parameter by conducting experiments using different values of beta (0.00, 0.25, 0.50, 0.75, 1.00, and RND) and evaluating the results. They performed 100 constructions for each value of beta and measured the average objective function value, computing time, deviation from the best solution found, and the number of times the algorithm reached the best solution. Based on these metrics, they determined the best value for the beta parameter. They found that a beta value of 0.00 resulted in the highest number of best solutions found, while beta values of 0.25 and RND had lower deviation from the best solution. They concluded that large beta values should be avoided as they introduce too much randomness in the search.'

In [13]:
reply("What is the main feature of the Fast Vertex Substitution?")

'Based on the provided information, there is no mention of the "Fast Vertex Substitution" in the selection of papers. Therefore, it is not possible to determine the main feature of the Fast Vertex Substitution based on the given information.'