# Summarizing Multiple PDFs

In [4]:
# !pip install langchain
# !pip install openai
# !pip install chromadb

In [5]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain import OpenAI, PromptTemplate
import glob
import openai

In [6]:
llm = OpenAI(temperature=0.2)
def summarize_pdfs_from_folder(pdfs_folder):
    summaries = []
    for pdf_file in glob.glob(pdfs_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        chain = load_summarize_chain(llm, chain_type="map_reduce")
        summary = chain.run(docs)
        print("Summary for: ", pdf_file)
        print(summary)
        print("\n")
        summaries.append(summary)
    
    return summaries

In [7]:
def custom_summary(pdf_folder, custom_prompt):
    summaries = []
    for pdf_file in glob.glob(pdf_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        prompt_template = custom_prompt + """

        {text}

        SUMMARY:"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
        chain = load_summarize_chain(llm, chain_type="map_reduce", 
                                    map_prompt=PROMPT, combine_prompt=PROMPT)
        summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
        summaries.append(summary_output)
        
    return summaries

In [10]:
summaries = summarize_pdfs_from_folder("./TEXs")

Summary for:  ./TEXs/Cost_sharing.pdf
 This paper presents a cost-sharing mechanism to finance relayers in Ethereum's proof-of-stake (PoS) system. It introduces a theoretical framework and practical implementation of the mechanism, and studies the false-name proofness of the mechanism and its applications on funding relayer sets in Blockchain. It also discusses the use of online learning and convex online learning to minimize regret, and provides a proof that the mechanism is α(n)-approximate.




In [30]:
# CUSTOM_PROMPT = "Write a concise summary of the following paper with this structure: Problem being solved; Approach; Main results; Main Discussion Points"
# custom_summaries = custom_summary("./pdfs", custom_prompt=CUSTOM_PROMPT)
# # Save all summaries into one .txt file
# with open("custom_summaries.txt", "w") as f:
#     for summary in custom_summaries:
#         f.write(summary + "\n"*3)

In [21]:
# Save all summaries into one .txt file
with open("summaries.txt", "w") as f:
    for summary in summaries:
        f.write(summary + "\n"*3)

# Querying Multiple PDFS

In [11]:
# .py
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFDirectoryLoader

In [14]:
# Python!
loader = PyPDFDirectoryLoader("./TEXs/")

docs = loader.load()

# Create the vector store index
index = VectorstoreIndexCreator().from_loaders([loader])

In [15]:
query = "What is the core idea of paper?"

index.query(query)

' The core idea of the paper is to study the false-name proofness of a cost-sharing mechanism and its applications on funding relayer sets in Blockchain and in particular in PBS/MEV-Boost, and to give a bound on the worst-case welfare that a cost-sharing mechanism that is no-deficit, Sybil-proof, and truthful mechanism can guarantee.'

In [16]:
query = "How would you complete the Our objectives section?"

index.query(query)

' Our objectives are to introduce the notation and key concepts proposed in [Dob+08] within the context of cost-sharing mechanism design, study the false-name proofness of this mechanism and its applications on funding relayer sets in Blockchain and in particular in PBS/MEV-Boost, and give a bound on the worst-case welfare that a cost-sharing mechanism that is no-deficit, Sybil-proof, and truthful mechanism can guarantee.'

In [17]:
query = "Give me your opinion on the codependent model? Can you give a more natural scenerio?"

index.query(query)

' The codependent model is an interesting approach to cost-sharing mechanisms, as it takes into account the fact that validators and builders have interdependent valuations. A more natural scenario would be a situation where the cost of a public good is shared among a group of people, such as a group of friends sharing the cost of a vacation.'