### Map Reduce Summarization in LangChain

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("apjspeech.pdf")
docs = loader.load_and_split()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
docs

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PScript5.dll Version 5.2', 'creationdate': 'D:20070730160943', 'moddate': 'D:20070730160943', 'title': 'Microsoft Word - Document1', 'author': 'Shri', 'source': 'apjspeech.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='A P J Abdul Kalam Departing speech \n \n \nFriends, I am delighted to address you all, in the country and those livi ng abroad, after \nworking with you and completing five beautiful and eventful years in Rashtrapati \nBhavan. Today, it is indeed a thanks giving occasion. I would like to narr ate, how I \nenjoyed every minute of my tenure enriched by the wonderful assoc iation from each one \nof you, hailing from different walks of life, be it politics, sci ence and technology, \nacademics, arts, literature, business, judiciary, administration, local bodies, farming, \nhome makers, special children, media and above all from the youth and st udent \ncommunity who are the future wealt

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
doc_chunks = splitter.split_documents(docs)
doc_chunks

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PScript5.dll Version 5.2', 'creationdate': 'D:20070730160943', 'moddate': 'D:20070730160943', 'title': 'Microsoft Word - Document1', 'author': 'Shri', 'source': 'apjspeech.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='A P J Abdul Kalam Departing speech \n \n \nFriends, I am delighted to address you all, in the country and those livi ng abroad, after \nworking with you and completing five beautiful and eventful years in Rashtrapati \nBhavan. Today, it is indeed a thanks giving occasion. I would like to narr ate, how I \nenjoyed every minute of my tenure enriched by the wonderful assoc iation from each one \nof you, hailing from different walks of life, be it politics, sci ence and technology, \nacademics, arts, literature, business, judiciary, administration, local bodies, farming, \nhome makers, special children, media and above all from the youth and st udent \ncommunity who are the future wealt

In [5]:
len(doc_chunks)

22

In [6]:
chunk_prompt = """
Provide a short and concise summary for the provided block of text.
Text : \n{text}
"""

In [7]:
from langchain_core.prompts import PromptTemplate

map_prompt = PromptTemplate(input_variables=['text'], template=chunk_prompt)
map_prompt

PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='\nProvide a short and concise summary for the provided block of text.\nText : \n{text}\n')

In [8]:
final_prompt = """
Provide a final summary for the entire block of text along with these important points.
Add a Motivational title.
Start the summary with an introduction.
Provide the summary in bullet points as per the speech.
Speech : \n{text}
"""

In [9]:
final_prompt_template = PromptTemplate(input_variables=['text'], template=final_prompt)
final_prompt_template

PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='\nProvide a final summary for the entire block of text along with these important points.\nAdd a Motivational title.\nStart the summary with an introduction.\nProvide the summary in bullet points as per the speech.\nSpeech : \n{text}\n')

In [10]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama-3.3-70b-versatile")
llm

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000002A54A5E9940>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002A54A5EA660>, model_name='llama-3.3-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [11]:
from langchain_classic.chains.summarize import load_summarize_chain

summary_chain = load_summarize_chain(llm=llm, chain_type="map_reduce", map_prompt=map_prompt, combine_prompt=final_prompt_template, verbose=True)
summary_chain

MapReduceDocumentsChain(verbose=True, llm_chain=LLMChain(verbose=True, prompt=PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='\nProvide a short and concise summary for the provided block of text.\nText : \n{text}\n'), llm=ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000002A54A5E9940>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002A54A5EA660>, model_name='llama-3.3-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********')), output_parser=StrOutputParser(), llm_kwargs={}), reduce_documents_chain=ReduceDocumentsChain(verbose=True, combine_documents_chain=StuffDocumentsChain(verbose=True, llm_chain=LLMChain(verbose=True, pro

In [12]:
summary = summary_chain.run(doc_chunks)
print(summary)

  summary = summary_chain.run(doc_chunks)




[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Provide a short and concise summary for the provided block of text.
Text : 
A P J Abdul Kalam Departing speech 
 
 
Friends, I am delighted to address you all, in the country and those livi ng abroad, after 
working with you and completing five beautiful and eventful years in Rashtrapati 
Bhavan. Today, it is indeed a thanks giving occasion. I would like to narr ate, how I 
enjoyed every minute of my tenure enriched by the wonderful assoc iation from each one 
of you, hailing from different walks of life, be it politics, sci ence and technology, 
academics, arts, literature, business, judiciary, administration, local bodies, farming, 
home makers, special children, media and above all from the youth and st udent 
community who are the future wealth of our country. During my intera ction at 
Rashtrapati Bhavan in Delhi and at every state and union t

  return len(self.get_token_ids(text))




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Provide a final summary for the entire block of text along with these important points.
Add a Motivational title.
Start the summary with an introduction.
Provide the summary in bullet points as per the speech.
Speech : 
Here is a short and concise summary of the text:

In his departing speech, A.P.J. Abdul Kalam reflects on his 5-year tenure as President, expressing gratitude for the associations he made with people from various walks of life and highlighting the importance of accelerating development to meet the aspirations of the youth.

The text lists 10 important messages, including accelerating development, empowering villages, and overcoming problems through partnership, with a focus on achieving a developed India by 2020, inspired by a question from a young girl named Anukriti.

An individual was asked by Anukriti why India can't become a developed nation by 2020, and they assured her that her ques