In [1]:
#!pip3 install --ignore-installed PyYAML langchain

In [2]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.llms import AzureOpenAI

### LLM

In [3]:
# configure the openai package to use Azure OpenAI
# Ideally set these variables using environment variables
# replace the XX with your API 

import os
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"]  = '2022-12-01'
os.environ["OPENAI_API_BASE"]  =  "https://XXXXXXXXXXX.openai.azure.com/"
os.environ["OPENAI_API_KEY"]  = "XXXXXXXXXXXXXXXXXXX"

In [4]:
from langchain.llms import AzureOpenAI

deployment_name='text-davinci-003'

llm = AzureOpenAI(engine=deployment_name, temperature=0)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


### Read doc and split the text

In [29]:
text_splitter = CharacterTextSplitter()
text_splitter

<langchain.text_splitter.CharacterTextSplitter at 0x299d87ccbb0>

In [30]:
# Input file is created from --> https://en.wikipedia.org/wiki/Prime_Minister_of_India#Origins_and_history

with open("IndiaPMs.txt", encoding="utf8") as f:
    state_of_the_union = f.read()
texts = text_splitter.split_text(state_of_the_union)

In [31]:
texts

["Since 1947, there have been 14 different prime ministers.[a] The first few decades after 1947 saw the Indian National Congress' (INC) near complete domination over the political map of India. India's first prime minister—Jawaharlal Nehru—took oath on 15 August 1947. Nehru went on to serve as prime minister for 17 consecutive years, winning four general elections in the process. His tenure ended in May 1964, on his death.[16][17] After the death of Nehru, Lal Bahadur Shastri—a former home minister and a leader of the Congress party—ascended to the position of prime minister. Shastri's tenure saw the Indo-Pakistani War of 1965. Shashtri subsequently died of a reported heart attack in Tashkent, after signing the Tashkent Declaration.[18]\n\nAfter Shastri, Indira Gandhi—Nehru's daughter—was elected as the country's first woman prime minister. Indira's first term in office lasted 11 years, in which she took steps such as nationalisation of banks;[19] end of allowances and political posts,

In [32]:
from langchain.docstore.document import Document

docs = [Document(page_content=t) for t in texts[:3]]

In [33]:
docs

[Document(page_content="Since 1947, there have been 14 different prime ministers.[a] The first few decades after 1947 saw the Indian National Congress' (INC) near complete domination over the political map of India. India's first prime minister—Jawaharlal Nehru—took oath on 15 August 1947. Nehru went on to serve as prime minister for 17 consecutive years, winning four general elections in the process. His tenure ended in May 1964, on his death.[16][17] After the death of Nehru, Lal Bahadur Shastri—a former home minister and a leader of the Congress party—ascended to the position of prime minister. Shastri's tenure saw the Indo-Pakistani War of 1965. Shashtri subsequently died of a reported heart attack in Tashkent, after signing the Tashkent Declaration.[18]\n\nAfter Shastri, Indira Gandhi—Nehru's daughter—was elected as the country's first woman prime minister. Indira's first term in office lasted 11 years, in which she took steps such as nationalisation of banks;[19] end of allowance

### Quick start - Summarize

In [34]:
#!pip install tiktoken

In [35]:
from langchain.chains.summarize import load_summarize_chain
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)

" India has had 14 different prime ministers since 1947, with Jawaharlal Nehru being the first. Indira Gandhi was the first female prime minister and served two terms. After her death in 1984, her son Rajiv was sworn in as Prime Minister. In 1989, the National Front, with outside support from the BJP and the Left Front, came to power with V.P. Singh as Prime Minister. P.V. Narasimha Rao formed a minority government in 1991 and took steps to liberalise the economy. Atal Bihari Vajpayee took office in 1998 and announced the conduct of five underground nuclear explosions in Pokhran. Vajpayee's tenure as prime minister ended in 2004."

### Using the stuff Chain to do summarization

In [36]:
chain = load_summarize_chain(llm, chain_type="stuff")
chain.run(docs)

" Since 1947, India has had 14 different prime ministers, the first being Jawaharlal Nehru who served for 17 years. After Nehru, Indira Gandhi was elected as the country's first female prime minister and served two terms. Rajiv Gandhi, Indira's son, was sworn in after her assassination in 1984. In 1989, the National Front, with outside support from the Bharatiya Janata Party (BJP) and the Left Front, came to power and Vishwanath Pratap Singh was elected prime minister. After Singh, Chandra Shekhar, P. V. Narasimha Rao, Atal Bihari Vajpayee, H. D. Deve Gowda, and I. K. Gujral all served as prime minister. In 2014, Narendra Modi was elected prime minister."

#### Custom Prompts

In [37]:
prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY IN ENGLISH:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [38]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n\nSince 1947, India has had 14 different prime ministers. The first was Jawaharlal Nehru, who served for 17 years and was followed by Lal Bahadur Shastri, Indira Gandhi, Morarji Desai, Charan Singh, Rajiv Gandhi, Vishwanath Pratap Singh, Chandra Shekhar, P.V. Narasimha Rao, Atal Bihari Vajpayee, and Narendra Modi. Each of these prime ministers had their own accomplishments and challenges, such as the Indo-Pakistani War of 1965, the establishment of Bangladesh, the nuclear test in Pokhran, the Operation Blue Star, the Bofors scandal, the Mandal Commission, the Babri Masjid demolition, and the Kargil War.'

In [39]:
chain

StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, template='Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN ENGLISH:', template_format='f-string', validate_template=True), llm=AzureOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={'engine': 'text-davinci-003'}, openai_api_key=None, openai_api_base=None, openai_organization=None, batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', deployment_nam

### Using the refine Chain to do summarization

In [40]:
chain = load_summarize_chain(llm, chain_type="refine")
chain.run(docs)

"\n\nSince 1947, India has had 14 different prime ministers, with Jawaharlal Nehru being the first. His tenure ended in 1964, followed by Lal Bahadur Shastri, who was in office during the Indo-Pakistani War of 1965. Indira Gandhi, Nehru's daughter, was the first female prime minister and served two terms, during which she took steps such as nationalisation of banks and the accession of Sikkim to India. After Indira, Morarji Desai became the first non-Congress prime minister, followed by Charan Singh, who had the shortest tenure in the history of the office. Indira Gandhi was elected prime minister a second time in 1980, during which Operation Blue Star was conducted, resulting in thousands of deaths. On 31 October 1984, Gandhi was assassinated by two of her bodyguards, and her eldest son Rajiv was sworn in, becoming the youngest person ever to hold the office of prime minister. Rajiv called for a general election, which the Congress won with an absolute majority. Vishwanath Pratap Sing

In [41]:
# We can also return the intermediate steps for refine chains, should we want to inspect them. 
# This is done with the return_refine_steps variable.

chain = load_summarize_chain(llm, chain_type="refine", return_intermediate_steps=True)
chain({"input_documents": docs}, return_only_outputs=True)

{'intermediate_steps': [" Since 1947, India has had 14 different prime ministers, with Jawaharlal Nehru being the first. His tenure ended in 1964, followed by Lal Bahadur Shastri, who was in office during the Indo-Pakistani War of 1965. Indira Gandhi, Nehru's daughter, was the first female prime minister and served two terms, during which she took steps such as nationalisation of banks and the accession of Sikkim to India. After Indira, Morarji Desai became the first non-Congress prime minister, followed by Charan Singh, who had the shortest tenure in the history of the office. Indira Gandhi was elected prime minister a second time in 1980, during which Operation Blue Star was conducted, resulting in thousands of deaths. She was assassinated in 1984.",
  "\n\nSince 1947, India has had 14 different prime ministers, with Jawaharlal Nehru being the first. His tenure ended in 1964, followed by Lal Bahadur Shastri, who was in office during the Indo-Pakistani War of 1965. Indira Gandhi, Nehr

#### Custom Prompts

In [42]:
prompt_template = """Write a concise summary of the following:


{text}


CONCISE SUMMARY IN ENGLISH:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary"
    "If the context isn't useful, return the original summary."
)

refine_prompt = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [43]:
chain = load_summarize_chain(llm, chain_type="refine", return_intermediate_steps=True, question_prompt=PROMPT, refine_prompt=refine_prompt)
chain({"input_documents": docs}, return_only_outputs=True)

{'intermediate_steps': ["\n\nSince 1947, India has had 14 different prime ministers, with Jawaharlal Nehru being the first. His tenure lasted 17 years, followed by Lal Bahadur Shastri, who was in office during the Indo-Pakistani War of 1965. Indira Gandhi, Nehru's daughter, was the first female prime minister and served two terms, during which she took steps such as nationalisation of banks and the accession of Sikkim to India. After Indira, Morarji Desai became the first non-Congress prime minister, followed by Charan Singh, who had the shortest tenure in the history of the office. Indira Gandhi was elected prime minister a second time in 1980, and was assassinated in 1984.",
  "\n\nSince 1947, India has had 14 different prime ministers, with Jawaharlal Nehru being the first. His tenure lasted 17 years, followed by Lal Bahadur Shastri, who was in office during the Indo-Pakistani War of 1965. Indira Gandhi, Nehru's daughter, was the first female prime minister and served two terms, dur

In [74]:
# ToDo: Answer is getting truncated for many of the experiments, which needs to be fixed

#### Experiment - 1

In [56]:
prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY IN ENGLISH in the following format:\
PM 1: ...
PM 2: ...
...
PM N: ..."""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [57]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n\nPM 1: Jawaharlal Nehru (1947-1964)\nPM 2: Lal Bahadur Shastri (1964-1966)\nPM 3: Indira Gandhi (1966-1977, 1980-1984)\nPM 4: Morarji Desai (1977-1979)\nPM 5: Charan Singh (1979-1980)\nPM 6: Rajiv Gandhi (1984-1989)\nPM 7: Vishwanath Pratap Singh (1989-1990)\nPM 8: Chandra Shekhar (1990-1991)\nPM 9: P. V. Narasimha Rao (1991-1996)\nPM 10: Atal Bihari Vajpayee (1996-1997, 1998-2004)\nPM 11: H. D. Deve Gowda (1996-1997)\nPM 12: I. K. Gujral (1997-1998)\nPM 13: Manmohan Singh (2004-2014)\nPM 14: Narendra Modi (2014-present)\n\nSince 1947, India has had 14 different Prime Ministers, beginning with Jawaharlal Nehru who served for 17 years. Subsequent Prime Ministers include Lal Bahadur Shastri, Indira Gandhi ('

#### Experiment - 2

In [46]:
prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY IN ENGLISH in the following format:\
PM 1: ...
PM 2: ...
...
PM N: ...

Do not give additional summary than the above format"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [47]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n\nPM 1: Jawaharlal Nehru (1947-1964)\nPM 2: Lal Bahadur Shastri (1964-1966)\nPM 3: Indira Gandhi (1966-1977, 1980-1984)\nPM 4: Morarji Desai (1977-1979)\nPM 5: Charan Singh (1979-1980)\nPM 6: Rajiv Gandhi (1984-1989)\nPM 7: Vishwanath Pratap Singh (1989-1990)\nPM 8: Chandra Shekhar (1990-1991)\nPM 9: P. V. Narasimha Rao (1991-1996)\nPM 10: Atal Bihari Vajpayee (1996-1997, 1998-2004)\nPM 11: H. D. Deve Gowda (1996-1997)\nPM 12: I. K. Gujral (1997-1998)\nPM 13: Manmohan Singh (2004-2014)\nPM 14: Narendra Modi (2014-present)'

#### Experiment - 3

In [49]:
prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY IN ENGLISH in the following format:\
PM 1: ...
PM 2: ...
...
PM N: ...

Write the key event for each step in a concise manner"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [50]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

"\n\nPM 1: Jawaharlal Nehru sworn in as first Prime Minister of India (15 August 1947)\nPM 2: Lal Bahadur Shastri ascends to Prime Minister (1964); Indo-Pakistani War of 1965\nPM 3: Indira Gandhi elected as first female Prime Minister (1966); Nationalisation of banks; Indo-Pakistani War of 1971; Establishment of Bangladesh; Accession of Sikkim to India; India's first nuclear test in Pokhran\nPM 4: Morarji Desai elected as first non-Congress Prime Minister (1977); Janata Party defeats Congress in general election\nPM 5: Charan Singh sworn in as Prime Minister (1979); Congress withdraws support; shortest tenure in history\nPM 6: Indira Gandhi elected as Prime Minister for second time (1980); Operation Blue Star; Indira Gandhi assassinated (1984)\nPM 7: Rajiv Gandhi sworn in as youngest Prime Minister (1984); Congress secures absolute majority in general election (1984)\nPM 8: V. P. Singh elected Prime Minister (1989); Mandal Commission's recommendations accepted\nPM 9: Chandra Shekhar sw

#### Experiment - 4

In [52]:
prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY IN ENGLISH in the following format:\
PM 1: ...
PM 2: ...
...
PM N: ...

Do not Write the key event for each step.\
Do not give additional summary than the above format.\
Write the answer in reverse cronological order."""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [53]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n\nPM 14: Narendra Modi took office in 2014 and implemented economic liberalisation, infrastructure development, and the National Highways Development Project and Pradhan Mantri Gram Sadak Yojana.\nPM 13: Atal Bihari Vajpayee took office in 1999 and implemented economic liberalisation, infrastructure development, and the National Highways Development Project and Pradhan Mantri Gram Sadak Yojana.\nPM 12: I. K. Gujral took office in 1997 and resigned in 1998.\nPM 11: H. D. Deve Gowda took office in 1996 and resigned in 1997.\nPM 10: Atal Bihari Vajpayee took office in 1996 and resigned in 1996.\nPM 9: P. V. Narasimha Rao took office in 1991 and implemented economic liberalisation and the liberalisation of the economy.\nPM 8: Chandra Shekhar took office in 1990 and resigned in 1991.\nPM 7: V. P. Singh took office in 1989 and resigned in 1990.\nPM 6: Rajiv Gandhi took office in 1984 and resigned in 1989.\nPM 5: Indira Gandhi took office in 1980 and resigned in 1984.\nPM 4: Charan Singh t

#### Experiment - 5

In [60]:
prompt_template = """Write a concise summary of the following:

{text}

Generate a list in following format:\
PM 1: ...
PM 2: ...
...
PM N: ...

Just write the PM name and the duration.\
Do not Write the key event for each step.\
Do not give additional summary than the above format.\
Write the answer in reverse cronological order."""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [59]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n\nPM 14: Narendra Modi (2014-present)\nPM 13: Manmohan Singh (2004-2014)\nPM 12: Atal Bihari Vajpayee (1998-2004)\nPM 11: I. K. Gujral (1997-1998)\nPM 10: H. D. Deve Gowda (1996-1997)\nPM 9: Atal Bihari Vajpayee (1996)\nPM 8: P. V. Narasimha Rao (1991-1996)\nPM 7: Chandra Shekhar (1990-1991)\nPM 6: V. P. Singh (1989-1990)\nPM 5: Rajiv Gandhi (1984-1989)\nPM 4: Indira Gandhi (1980-1984)\nPM 3: Charan Singh (1979-1980)\nPM 2: Morarji Desai (1977-1979)\nPM 1: Jawaharlal Nehru (1947-1964)'

#### Experiment - 6

In [72]:
prompt_template = """Write a concise summary of the following:

{text}

Generate a list of name and duration.\
Do not Write the key event for each person.\
Do not give any additional information.\
Write the answer in reverse cronological order.
Provide them in JSON format with following keys:
name, duration
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [79]:
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

'\n[\n  {\n    "name": "Narendra Modi",\n    "duration": "2014-present"\n  },\n  {\n    "name": "Manmohan Singh",\n    "duration": "2004-2014"\n  },\n  {\n    "name": "Atal Bihari Vajpayee",\n    "duration": "1998-2004"\n  },\n  {\n    "name": "I. K. Gujral",\n    "duration": "1997-1998"\n  },\n  {\n    "name": "H. D. Deve Gowda",\n    "duration": "1996-1997"\n  },\n  {\n    "name": "Atal Bihari Vajpayee",\n    "duration": "1996"\n  },\n  {\n    "name": "P. V. Narasimha Rao",\n    "duration": "1991-1996"\n  },\n  {\n    "name": "Chandra Shekhar",\n    "duration": "1990-1991"\n  },\n  {\n    "name": "Vishwanath Pratap Singh",\n'