In [94]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
import requests

llm = OpenAI(temperature=0)

text_splitter = CharacterTextSplitter()

url = "https://raw.githubusercontent.com/amephraim/nlp/master/texts/J.%20K.%20Rowling%20-%20Harry%20Potter%201%20-%20Sorcerer's%20Stone.txt"
response = requests.get(url)
hp = response.text

texts = text_splitter.split_text(hp)

from langchain.docstore.document import Document

docs = [Document(page_content=t) for t in texts[:7]]

### Summarization with map-reduce

In [95]:
from langchain.chains.summarize import load_summarize_chain
import textwrap

chain = load_summarize_chain(llm, chain_type='map_reduce')

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width = 100)
print(wrapped_text)

 Mr. and Mrs. Dursley are a normal couple living in Privet Drive, but they are related to the
Potters, a family they try to keep away from. On a dull Tuesday morning, Mr. Dursley notices strange
people in cloaks around town and hears them talking about someone called Harry Potter. Later, he
runs into an old man wearing a violet cloak who tells him that "You-Know-Who" has gone. Mr. Dursley
notices a tabby cat on his garden wall and wonders if it is normal cat behavior. He and his wife
have a normal dinner conversation, but then the news reports of owls flying in daylight and shooting
stars all over Britain. Albus Dumbledore, Professor McGonagall, and Hagrid arrive at the Dursleys'
house to deliver baby Harry Potter. Dumbledore leaves Harry on the doorstep with a letter and a scar
shaped like a bolt of lightning on his forehead. Harry sleeps peacefully, unaware of the events that
have just taken place and the fact that he is famous.


In [96]:
chain = load_summarize_chain(llm, chain_type = 'map_reduce', verbose=True)

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100, break_long_words=False, replace_whitespace=False)
print(wrapped_text)



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Harry Potter and the Sorcerer's Stone


CHAPTER ONE

THE BOY WHO LIVED

Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say
that they were perfectly normal, thank you very much. They were the last
people you'd expect to be involved in anything strange or mysterious,
because they just didn't hold with such nonsense.

Mr. Dursley was the director of a firm called Grunnings, which made
drills. He was a big, beefy man with hardly any neck, although he did
have a very large mustache. Mrs. Dursley was thin and blonde and had
nearly twice the usual amount of neck, which came in very useful as she
spent so much of her time craning over garden fences, spying on the
neighbors. The Dursleys had a small son called Dudley and in their
opinion there was no finer boy anywhere.

The Dursleys had everything they wanted, but they also 

In [97]:
prompt_template = """
Write a concise bullet point summary of the following:

{text}

CONCISE SUMMARY IN BULLET POINTS:
"""

BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template, input_variables=['text'])

chain = load_summarize_chain(llm, chain_type='map_reduce', map_prompt = BULLET_POINT_PROMPT, combine_prompt = BULLET_POINT_PROMPT)

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100, break_long_words=False, replace_whitespace=False)

print(wrapped_text)

KeyboardInterrupt: 

### Summarization with stuffing

In [None]:
chain = load_summarize_chain(llm, chain_type='stuff')

chain = load_summarize_chain(llm, chain_type='stuff', prompt = BULLET_POINT_PROMPT)

output_summary = chain.run(docs)

wrapped_text = textwrap.fill(output_summary, width = 100, break_long_words=False, replace_whitespace=False)

print(wrapped_text)

• Mr. and Mrs. Dursley are proud to be perfectly normal and don't believe in anything strange or
mysterious. 
• They have a son, Dudley, and a secret: they are related to the Potters. 
• On a dull
Tuesday morning, Mr. Dursley notices a cat reading a map and strange people in cloaks. 
• He hears
people whispering about the Potters and their son, Harry. 
• He sees owls flying in broad daylight
and people celebrating something. 
• He meets an old man wearing a violet cloak who tells him that
"You-Know-Who has gone at last". 
• He sees the same cat waiting in his front garden and wonders if
it has anything to do with the Potters.


### Summarization with refinement

In [None]:
chain = load_summarize_chain(llm, chain_type = 'refine')

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)

  Mr. and Mrs. Dursley are a normal couple living in Privet Drive, but they have a secret - they are
related to the Potters, a family they try to keep away from. On a dull Tuesday morning, Mr. Dursley
leaves for work and notices a strange cat reading a map. He dismisses it as a trick of the light,
but it is the first sign of the mysterious events to come. As he sits in the morning traffic jam, he
notices a lot of strangely dressed people in cloaks. He assumes it is a new fashion, but is enraged
to see that some of them are older than him. He arrives at work and has a normal, owl-free morning,
but when he goes to buy a bun from the bakery he notices a group of people in cloaks whispering
excitedly. He hears them mention the Potters and their son, Harry, and is filled with fear. He
hurries back to his office and is about to call home when he changes his mind. As he leaves the
building, he runs into an old man wearing a violet cloak who tells him that You-Know-Who has gone at
last and tha

In [None]:
# Refinement more manual approach

prompt_template = """
Write a concise summary of the following extracting the key information:

{text}

CONCISE SUMMARY:
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=['text'])

refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary"
    "If the context isn't useful, return the original summary."
)

refine_prompt = PromptTemplate(input_variables = ['existing_answer', 'text'], template = refine_template)

chain = load_summarize_chain(OpenAI(temperature=0),
                             chain_type='refine',
                             return_intermediate_steps = True,
                             question_prompt=PROMPT,
                             refine_prompt=refine_prompt)

output_summary = chain({'input_documents': docs}, return_only_outputs=True)
wrapped_text = textwrap.fill(output_summary['output_text'],
                             width=100,
                             break_long_words=False,
                             replace_whitespace=False)

print(wrapped_text)



The Dursleys, a seemingly normal family, are living in Privet Drive and have a secret they are
desperate to keep hidden. On a dull Tuesday morning, Mr. Dursley leaves for work and notices a
strange cat reading a map. Unbeknownst to him, this is the start of a series of mysterious events
that will soon take place all over the country. As he sits in the morning traffic jam, he notices a
lot of strangely dressed people about. He is enraged to see that a couple of them aren't young at
all and are wearing cloaks. Later, he notices owls flying in broad daylight and people pointing and
gazing open-mouthed. He then passes a group of people in cloaks whispering excitedly and hears them
mention the Potters and their son, Harry. Fearful, he hurries back to his office and is further
disturbed when an old man wearing a violet cloak tells him to rejoice as You-Know-Who has gone at
last. Mr. Dursley is further rattled when the same cat he saw earlier is now sitting on his garden
wall. He tries to a

### Summary checker

In [None]:
llm = OpenAI(model_name='text-davinci-003',
             temperature=0,
             max_tokens=256)

article = '''Coinbase, the second-largest crypto exchange by trading volume, released its Q4 2022 earnings on Tuesday, giving shareholders and market players alike an updated look into its financials. In response to the report, the company's shares are down modestly in early after-hours trading.In the fourth quarter of 2022, Coinbase generated $605 million in total revenue, down sharply from $2.49 billion in the year-ago quarter. Coinbase's top line was not enough to cover its expenses: The company lost $557 million in the three-month period on a GAAP basis (net income) worth -$2.46 per share, and an adjusted EBITDA deficit of $124 million.Wall Street expected Coinbase to report $581.2 million in revenue and earnings per share of -$2.44 with adjusted EBITDA of -$201.8 million driven by 8.4 million monthly transaction users (MTUs), according to data provided by Yahoo Finance.Before its Q4 earnings were released, Coinbase's stock had risen 86% year-to-date. Even with that rally, the value of Coinbase when measured on a per-share basis is still down significantly from its 52-week high of $206.79.That Coinbase beat revenue expectations is notable in that it came with declines in trading volume; Coinbase historically generated the bulk of its revenues from trading fees, making Q4 2022 notable. Consumer trading volumes fell from $26 billion in the third quarter of last year to $20 billion in Q4, while institutional volumes across the same timeframe fell from $133 billion to $125 billion.The overall crypto market capitalization fell about 64%, or $1.5 trillion during 2022, which resulted in Coinbase's total trading volumes and transaction revenues to fall 50% and 66% year-over-year, respectively, the company reported.As you would expect with declines in trading volume, trading revenue at Coinbase fell in Q4 compared to the third quarter of last year, dipping from $365.9 million to $322.1 million. (TechCrunch is comparing Coinbase's Q4 2022 results to Q3 2022 instead of Q4 2021, as the latter comparison would be less useful given how much the crypto market has changed in the last year; we're all aware that overall crypto activity has fallen from the final months of 2021.)There were bits of good news in the Coinbase report. While Coinbase's trading revenues were less than exuberant, the company's other revenues posted gains. What Coinbase calls its "subscription and services revenue" rose from $210.5 million in Q3 2022 to $282.8 million in Q4 of the same year, a gain of just over 34% in a single quarter.And even as the crypto industry faced a number of catastrophic events, including the Terra/LUNA and FTX collapses to name a few, there was still growth in other areas. The monthly active developers in crypto have more than doubled since 2020 to over 20,000, while major brands like Starbucks, Nike and Adidas have dived into the space alongside social media platforms like Instagram and Reddit.With big players getting into crypto, industry players are hoping this move results in greater adoption both for product use cases and trading volumes. Although there was a lot of movement from traditional retail markets and Web 2.0 businesses, trading volume for both consumer and institutional users fell quarter-over-quarter for Coinbase.Looking forward, it'll be interesting to see if these pieces pick back up and trading interest reemerges in 2023, or if platforms like Coinbase will have to keep looking elsewhere for revenue (like its subscription service) if users continue to shy away from the market.
'''

wrapped_text = textwrap.fill(article, width = 100, break_long_words=False, replace_whitespace=False)
# print(wrapped_text)
# print(len(article))

fact_extraction_prompt = PromptTemplate(
    input_variables=['text_input'],
    template = "Extract the key facts out of this text. Don't include opinions.\
    give each fact a number and keep them short sentences. :\n\n {text_input}"
)

fact_extraction_chain = LLMChain(llm=llm, prompt=fact_extraction_prompt)
facts = fact_extraction_chain.run(article)

wrapped_text = textwrap.fill(facts, width=100, break_long_words=False, replace_whitespace=False)
print(wrapped_text)


1. Coinbase released its Q4 2022 earnings on Tuesday. 
2. Coinbase generated $605 million in total
revenue in Q4 2022. 
3. Coinbase lost $557 million in the three-month period on a GAAP basis. 
4.
Wall Street expected Coinbase to report $581.2 million in revenue. 
5. Coinbase's stock had risen
86% year-to-date before its Q4 earnings were released. 
6. Coinbase's total trading volumes and
transaction revenues fell 50% and 66% year-over-year, respectively. 
7. Trading revenue at Coinbase
fell from $365.9 million to $322.1 million quarter-over-quarter. 
8. Coinbase's "subscription and
services revenue" rose from $210.5 million in Q3 2022 to $282.8 million in Q4 of the same year. 
9.
Monthly active developers in crypto have more than doubled since 2020 to over 20,000. 
10. Major
brands like Starbucks, Nike and Adidas have dived into the crypto space.


In [93]:
from langchain.chains import LLMSummarizationCheckerChain
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)

checker_chain = LLMSummarizationCheckerChain(llm=llm, verbose=True, max_checks=2)

final_summary = checker_chain.run(article)
final_summary





[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven some text, extract a list of facts from the text.

Format your output as a bulleted list.

Text:
"""
Coinbase, the second-largest crypto exchange by trading volume, released its Q4 2022 earnings on Tuesday, giving shareholders and market players alike an updated look into its financials. In response to the report, the company's shares are down modestly in early after-hours trading.In the fourth quarter of 2022, Coinbase generated $605 million in total revenue, down sharply from $2.49 billion in the year-ago quarter. Coinbase's top line was not enough to cover its expenses: The company lost $557 million in the three-month period on a GAAP basis (net income) worth -$2.46 per share, and an adjusted EBITDA deficit of $124 million.Wall Street expected Coinbase to report $581.2 million in revenue and earnings per share of -$2.44 with adjusted EB

"Coinbase, the second-largest crypto exchange by trading volume, released its Q4 2022 earnings on Tuesday, giving shareholders and market players alike an updated look into its financials. In response to the report, the company's shares are down modestly in early after-hours trading.In the fourth quarter of 2022, Coinbase generated $605 million in total revenue, down sharply from $2.49 billion in the year-ago quarter. Coinbase's top line was not enough to cover its expenses: The company lost $557 million in the three-month period on a GAAP basis (net income) worth -$2.46 per share, and an adjusted EBITDA deficit of $124 million.Wall Street expected Coinbase to report $581.2 million in revenue and earnings per share of -$2.44 with adjusted EBITDA of -$201.8 million driven by 8.4 million monthly transaction users (MTUs), according to data provided by Yahoo Finance.Before its Q4 earnings were released, Coinbase's stock had risen 86% year-to-date. Despite the rally, the value of Coinbase w

In [98]:
len(final_summary)

1115

In [99]:
checker_chain.create_assertions_prompt.template

'Given some text, extract a list of facts from the text.\n\nFormat your output as a bulleted list.\n\nText:\n"""\n{summary}\n"""\n\nFacts:'

In [100]:
checker_chain.revised_summary_prompt.template

'Below are some assertions that have been fact checked and are labeled as true or false. If the answer is false, a suggestion is given for a correction.\n\nChecked Assertions:\n"""\n{checked_assertions}\n"""\n\nOriginal Summary:\n"""\n{summary}\n"""\n\nUsing these checked assertions, rewrite the original summary to be completely true.\n\nThe output should have the same structure and formatting as the original summary.\n\nSummary:'