# Introduction

This notebook explores summarization templates and the default prompts supplied by LangChain.

In [1]:
import os
import textwrap

# I have a personal colletion of utilities in a standalone module.
# Necessary keys are in a .env file loaded by the load_keys utility function.

from helpers.utilities import install_if_needed, load_keys

In [2]:
load_keys()

dotenv is already installed.


In [3]:
install_if_needed (['langchain'])

from langchain.chat_models import AzureChatOpenAI

langchain is already installed.


## load_summarize_chain

In [16]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

llm = AzureChatOpenAI(deployment_name="gpt-35-turbo",
                      temperature=0,)

In [17]:
chain = load_summarize_chain(llm, chain_type="stuff")

In [18]:
chain

StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, template='Write a concise summary of the following:\n\n\n"{text}"\n\n\nCONCISE SUMMARY:', template_format='f-string', validate_template=True), llm=AzureChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key='1ecbd04e3c304fc9b08a0c460980ccaa', openai_api_base='https://ls-d-eus2-aiservice.openai.azure.com/', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_m

In [19]:
%%time
result = chain.run(docs)

InvalidRequestError: Resource not found

In [None]:
print(textwrap.fill(result))

In [None]:
chain.__fields__.keys()

In [None]:
print(chain.llm_chain.prompt.template)

## StuffDocumentsChain

The load_summarize_chain with chain_type stuff is really just an instance of a StuffDocumentsChain built from a PromptTemplate.

In [None]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

prompt_template = """Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = llm
llm_chain = LLMChain(llm=llm, prompt=prompt)

stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

docs = loader.load()

In [None]:
%%time
result = stuff_chain.run(docs)

In [None]:
print(textwrap.fill(result))

In [None]:
print(stuff_chain.llm_chain.prompt.template)

## Map-Reduce

Now we explore the use of map-reduce to address inputs that are too large.

In [None]:
# Switching to gpt-3.5-turbo which has a smaller input window than gpt-3.5-turbo-16k.
llm = ChatOpenAI(model="gpt-3.5-turbo")

In [None]:
# Build the stuff chain around the smaller input window llm.
llm_chain = LLMChain(llm=llm, prompt=prompt)
smaller_stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

In [None]:
# Generate the anticipated error.
try:
    smaller_stuff_chain.run(docs)
except Exception as e:
    print(f"Caught an Invalid Request Error: {e}")

In [None]:
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import MapReduceDocumentsChain

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=200)
chunks = text_splitter.split_documents(docs)

In [None]:
len(chunks)

In [None]:
map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce")

In [None]:
%%time
result = map_reduce_chain.run(input_documents=chunks, return_only_outputs=True)

In [None]:
print(textwrap.fill(result))

In [None]:
map_reduce_chain.__fields__.keys()

In [None]:
print(map_reduce_chain.llm_chain.prompt.template)

In [None]:
print(map_reduce_chain.reduce_documents_chain.combine_documents_chain.llm_chain.prompt.template)

## Map-Refine

In [None]:
from langchain.chains import RefineDocumentsChain

In [None]:
refine_chain = load_summarize_chain(llm, chain_type="refine")

In [None]:
%%time
result = refine_chain.run(input_documents=chunks, return_only_outputs=True)

In [None]:
print(textwrap.fill(result))

In [None]:
refine_chain.__fields__.keys()

In [None]:
print(refine_chain.initial_llm_chain.prompt.template)

In [None]:
print(refine_chain.refine_llm_chain.prompt.template)

The refine chain doesn't seem to be working exactly as anticipated.  The output seems to relate to the impact of the last chunk and doesn't reflect the aggregated response.  Perhaps I am looking in the wrong part of the object.  Let's repeat but change the return_only_outputs parameter to False and then look more carefully at the result object.

In [None]:
%%time
result = refine_chain.run(input_documents=chunks, return_only_outputs=False)

In [None]:
result