In [2]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
llm = OpenAI(temperature=0)
text_splitter = CharacterTextSplitter()

In [3]:
with open("./huberman_procrastination.txt") as f:
    text_doc = f.read()

texts = text_splitter.split_text(text_doc)

In [4]:
docs = [Document(page_content=t) for t in texts[:3]]

In [5]:
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)

' In this podcast, Professor Andrew Huberman of Stanford School of Medicine discusses dopamine and its role in pleasure, motivation, drive, pursuit, procrastination, and confidence. He provides practical tools to leverage dopamine to optimize mental health, physical health, and performance. The podcast is now partnered with Momentous Supplements, Helix Sleep, Whoop, and Roka to provide listeners with better sleep and health. Dopamine is a chemical that modulates the electrical activity of neurons in the brain and body, and is released in five circuits in the brain, each of which engages different functions.'

Let's look at summarization with `map_reduce` but using a custom prompt.

In [6]:
prompt_template = """Write a comprehensive summary of this video transcript. 

Divide it into,
1. Dopamine and Procrastination
2. Tools proposed
3. Protocols
4. Conclusion

{text}

SUMMARY:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", 
                             map_prompt=PROMPT, combine_prompt=PROMPT)
summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
summary_output

'\nThis podcast discusses the role of dopamine in motivation, drive, pleasure, and procrastination. It explains how dopamine is released in five circuits in the brain, including the Nigro striatal pathway, the mesolimbic pathway, the VTA and nucleus accumbens, the prefrontal cortex, and the mesocortical pathway. It is involved in basic functions like maintaining body temperature and libido, decision-making, and reward-seeking behavior. The podcast provides tools to leverage dopamine circuitry and levels in order to become more motivated and to overcome procrastination. It is sponsored by Helix Sleep, Whoop, and Roka, who offer discounts on their products. Momentous supplements can also be found at livemomentous.com huberman.'

In [13]:
# write summary_output to text file
with open("./summary_output.txt", "w") as f:
    f.write(summary_output)

# Summarizing PDFs

In [None]:
loader = PyPDFLoader("/home/lucassoares/Desktop/projects/content_creation/automating_work_research/pdfs/paper_shorts_memory.pdf")
docs = loader.load_and_split()
chain = load_summarize_chain(llm, chain_type="map_reduce")
summary_1 = chain.run(docs)
summary_1

For some reason the [`refine`](https://python.langchain.com/en/latest/modules/chains/index_examples/summarize.html) chain was taking really long.