# Summarizing texts


In [None]:
from dotenv import load_dotenv
import os

load_dotenv("../secrets.env")

os.getenv("OPENAI_API_KEY") is not None

In [None]:
from pathlib import Path

docs_dir = Path("~/shared/RR-workshop-data/federalist-papers/split").expanduser()

## Loading text documents


In [None]:
with open(docs_dir / "federalist_1.txt") as f:
    doc = f.read()

print(doc[:200])

In [None]:
from langchain.document_loaders import DirectoryLoader

doc_loader = DirectoryLoader(docs_dir, show_progress=True)
docs = doc_loader.load()

## Stuffing


In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-16k")

In [None]:
for doc in docs[:3]:
    print("-" * 30)
    print(
        llm.predict("Summarize the following text in 200 words: \n" + doc.page_content)
    )

### Using a chain


In [None]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

In [None]:
# Define prompt
prompt_template = "Write a concise summary of the following:{text}"

prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

print(stuff_chain.run(docs[:1]))

### Convenience wrapper


In [None]:
from langchain.chains.summarize import load_summarize_chain

chain = load_summarize_chain(llm=llm, chain_type="stuff")

chain.run(docs[:1])

## Map-reduce


In [None]:
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain

In [None]:
# Map
map_template = "Write a concise summary of the following: {docs}"
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [None]:
# Reduce
reduce_template = "Distill the following summaries into a final, consolidated summary of the main topics: {doc_summaries}"
reduce_prompt = PromptTemplate.from_template(reduce_template)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

In [None]:
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

In [None]:
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [None]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

In [None]:
print(map_reduce_chain.run(docs[:10]))