In [3]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
llm = OpenAI(temperature=0)
text_splitter = CharacterTextSplitter()

In [4]:
with open("./text_doc.txt") as f:
    text_doc = f.read()

texts = text_splitter.split_text(text_doc)

In [6]:
docs = [Document(page_content=t) for t in texts[:3]]

In [8]:
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)

' This essay argues for the need for technology that can augment our thinking capabilities, similar to the way pencil and paper do. Representations are mental tools that shape how we understand the world and are heavily dependent on the media they are expressed on. The author recommends watching Brett Victor\'s lecture on "The Human Representation of Thought" to learn more about representations and how they can be used to generate insight. Technology has given us the opportunity to reconsider our forced choices and evolve from the rectangle boxes we use to do knowledge work.'

In [4]:
loader = PyPDFLoader("/home/lucassoares/Desktop/projects/content_creation/automating_work_research/pdfs/paper_shorts_memory.pdf")
docs = loader.load_and_split()

In [5]:
chain = load_summarize_chain(llm, chain_type="map_reduce")
summary_1 = chain.run(docs)
summary_1

' This study examines the effects of short-form videos on our capacity to retain intentions. Results showed that TikTok significantly degraded performance, while neither Twitter nor YouTube had any observable effect. Further analysis found significant differences in the drift rate, variance, and decision bound in different interruptions, and significant differences in the variance and non-decision time in pre- and post-interruptions. This research contributes an empirical understanding of the impact of different social media feeds on prospective memory and outlines consequences for media technology designers.'

Let's look at summarization with `map_reduce` but using a custom prompt.

In [12]:
prompt_template = """Write a comprehensive summary of this paper. 

Divide it into,
1. Problem
2. Proposed solution
3. Results
4. Discussion.

{text}

SUMMARY:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", 
                             map_prompt=PROMPT, combine_prompt=PROMPT)
summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
summary_output

'\n\nThis paper examines the impact of different social media feed modalities on cognitive performance in a dual-task environment. An experiment was conducted with 60 participants, who were randomly assigned to one of four conditions: Rest, Twitter, YouTube, or TikTok. The results showed that the TikTok interruption condition had a significant negative impact on accuracy in the short-term task, while the other conditions had no significant effect. The Drift Diffusion Model (DDM) was used to further interpret the results, and showed that participants had a 20.00% error rate before the TikTok interruption, and a 50.98% error rate after the interruption. The authors concluded that different social media feed designs, with varying media modalities and context-switch frequencies, can impact cognitive performance in terms of short-term task performance. They also discussed the implications of their findings for media technology designers and highlighted future work in the field. Additionally

In [13]:
# write summary_output to text file
with open("./summary_output.txt", "w") as f:
    f.write(summary_output)

For some reason the [`refine`](https://python.langchain.com/en/latest/modules/chains/index_examples/summarize.html) chain was taking really long.