# Summarizing Multiple PDFs

In [None]:
# !pip install langchain
# !pip install openai
# !pip install chromadb

In [18]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain import OpenAI, PromptTemplate
import glob

In [19]:
llm = OpenAI(temperature=0.2)
def summarize_pdfs_from_folder(pdfs_folder):
    summaries = []
    for pdf_file in glob.glob(pdfs_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        chain = load_summarize_chain(llm, chain_type="map_reduce")
        summary = chain.run(docs)
        print("Summary for: ", pdf_file)
        print(summary)
        print("\n")
        summaries.append(summary)
    
    return summaries

In [29]:
def custom_summary(pdf_folder, custom_prompt):
    summaries = []
    for pdf_file in glob.glob(pdf_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        prompt_template = custom_prompt + """

        {text}

        SUMMARY:"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
        chain = load_summarize_chain(llm, chain_type="map_reduce", 
                                    map_prompt=PROMPT, combine_prompt=PROMPT)
        summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
        summaries.append(summary_output)
        
    return summaries

In [20]:
summaries = summarize_pdfs_from_folder("./pdfs")

Summary for:  ./pdfs/prompt_vision_language_models_paper.pdf


This paper presents CoOp, a few-shot learning method that uses learnable context vectors to optimize pre-trained vision-language models for downstream image recognition tasks. Experiments on 11 datasets show that CoOp outperforms hand-crafted prompts and the linear probe model, and is robust to domain shifts. The paper also discusses various research papers related to computer vision, including topics such as natural adversarial examples, scaling up visual and vision-language representation learning, visual prompt tuning, predicting deep zero-shot convolutional neural networks, parameter-efficient prompt tuning, learning visual n-grams from web data, optimizing continuous prompts for generation, pre-training with noisy text supervision, measuring robustness to natural distribution shifts, and learning robust global representations.


Summary for:  ./pdfs/scaling_transformer_paper.pdf
 This paper presents the Recurrent Memor

In [30]:
# CUSTOM_PROMPT = "Write a concise summary of the following paper with this structure: Problem being solved; Approach; Main results; Main Discussion Points"
# custom_summaries = custom_summary("./pdfs", custom_prompt=CUSTOM_PROMPT)
# # Save all summaries into one .txt file
# with open("custom_summaries.txt", "w") as f:
#     for summary in custom_summaries:
#         f.write(summary + "\n"*3)

In [21]:
# Save all summaries into one .txt file
with open("summaries.txt", "w") as f:
    for summary in summaries:
        f.write(summary + "\n"*3)

# Querying Multiple PDFS

In [22]:
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFDirectoryLoader

In [23]:
loader = PyPDFDirectoryLoader("./pdfs/")

docs = loader.load()

# Create the vector store index
index = VectorstoreIndexCreator().from_loaders([loader])

Using embedded DuckDB without persistence: data will be transient


In [24]:
query = "What is the core idea behind the CoOP (context optimization) paper?"

index.query(query)

" The core idea behind the CoOP paper is to model a prompt's context words with learnable vectors while keeping the entire pre-trained parameters fixed, in order to adapt CLIP-like vision-language models for downstream image recognition tasks."

In [25]:
query = "What is the central idea that can allow for scaling transformers to 1 million tokens?"

index.query(query)

' The central idea is to use the Recurrent Memory Transformer (RMT) architecture to extend the context length of BERT, allowing it to store and process both local and global information across up to 2 million tokens.'

In [26]:
query = "According to the LLM+P paper, how do you empower large language models with optimal planning profficiency?"

index.query(query)

' LLM+P takes in a natural language description of a planning problem, then returns a correct (or optimal) plan for solving that problem in natural language. LLM+P does so by first converting the language description into a file written in the planning domain definition language (PDDL), then leveraging classical planners to quickly find a solution, and then translating the found solution back into natural language.'