## StuffDocumentsChain

In [125]:
from langchain.chains import StuffDocumentsChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# This controls how each document will be formatted. Specifically,
# it will be passed to `format_document` - see that function for more
# details.
document_prompt = PromptTemplate.from_template(
    "Page {page}\n{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
# The prompt here should take as an input variable the
# `document_variable_name`
prompt = PromptTemplate.from_template(
    "Summarize this content: {context}"
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name,
    verbose=True
)

In [126]:
# Generate langchain Document with dummy text.
with open("../data/state_of_the_union.txt", "r") as f:
    DUMMY_TXT = ""
    for line in f.readlines():
        if line.strip() != "":
            DUMMY_TXT += line

import numpy as np
from langchain.schema import Document

def lc_doc_generator(n, txt, ws=2000):
    cnt, start, end = 0, 0, ws
    doc_size = len(txt)
    
    while (cnt < n) and (start <= doc_size):
        doc = Document(
            page_content=txt[start:end],
            metadata={"page":cnt}
        )
        cnt += 1
        start = end
        end += ws
        yield doc

lc_doc_generator = lc_doc_generator(n=2, txt=DUMMY_TXT)
docs = [doc for doc in lc_doc_generator]

In [128]:
from typing import List, Any
from langchain.schema import format_document
## --------------------- ##
## Way that BaseCombineDocumentsChain format documents
print("## --------------------- ##")
# input_keys of BaseCombineDocumentsChain
print(chain.input_keys)

doc = docs[0]
document_prompt = PromptTemplate.from_template(
    "Page {page}\n{page_content}"
)
print(format_document(doc, prompt=document_prompt))

print("## --------------------- ##")
## Way that StuffDocumentChain get inputs and format the final prompt
# _get_inputs of StuffDocumentsChain
def _get_inputs(chain, docs: List[Document], **kwargs: Any) -> dict:
    """Construct inputs from kwargs and docs.

    Format and the join all the documents together into one input with name
    `chain.document_variable_name`. The pluck any additional variables
    from **kwargs.

    Args:
        docs: List of documents to format and then join into single input
        **kwargs: additional inputs to chain, will pluck any other required
            arguments from here.

    Returns:
        dictionary of inputs to LLMChain
    """
    # Format each document according to the prompt
    doc_strings = [format_document(doc, chain.document_prompt) for doc in docs]
    # Join the documents together to put them in the prompt.
    inputs = {
        k: v
        for k, v in kwargs.items()
        if k in chain.llm_chain.prompt.input_variables
    }
    inputs[chain.document_variable_name] = chain.document_separator.join(doc_strings)
    return inputs

print(f"Keys: {_get_inputs(chain, docs).keys()}", end="\n\n")
print(_get_inputs(chain, docs)["context"])

print("## --------------------- ##")
print(chain.output_keys, chain.input_keys)
print(chain.run({"input_documents":docs}))

## --------------------- ##
['input_documents']
Page 0
Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  
Last year COVID-19 kept us apart. This year we are finally together again. 
Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. 
With a duty to one another to the American people to the Constitution. 
And with an unwavering resolve that freedom will always triumph over tyranny. 
Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. 
He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. 
He met the Ukrainian people. 
From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. 
Groups of ci

## MapReduceDocumentsChain

In [None]:
from langchain.chains import (
    StuffDocumentsChain,
    LLMChain,
    ReduceDocumentsChain,
    MapReduceDocumentsChain,
)
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# This controls how each document will be formatted. Specifically,
# it will be passed to `format_document` - see that function for more
# details.
document_prompt = PromptTemplate.from_template(
    "Page {page}\n{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
# The prompt here should take as an input variable the
# `document_variable_name`
prompt = PromptTemplate.from_template(
    "Summarize this content: {context}"
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
# We now define how to combine these summaries
reduce_prompt = PromptTemplate.from_template(
    "Combine these summaries: {context}"
)
reduce_llm_chain = LLMChain(llm=llm, prompt=reduce_prompt)
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name
)
reduce_documents_chain = ReduceDocumentsChain(
    combine_documents_chain=combine_documents_chain,
)
chain = MapReduceDocumentsChain(
    llm_chain=llm_chain,
    reduce_documents_chain=reduce_documents_chain,
)