In [10]:
from langchain.output_parsers.openai_tools import JsonOutputToolsParser
from langchain_community.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import create_extraction_chain
from typing import Optional, List
from langchain.chains import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel
from langchain import hub

In [31]:
obj = hub.pull("wfh/proposal-indexing")
llm = ChatOpenAI(model='gpt-3.5-turbo-1106', openai_api_key = os.getenv("OPENAI_API_KEY", 'YouKey'))

In [33]:
runnable = obj | llm

In [34]:
with open('superlinear.txt') as file:
    essay = file.read()

In [35]:
class Sentences(BaseModel):
    sentences: List[str]
    
# Extraction
extraction_chain = create_extraction_chain_pydantic(pydantic_schema=Sentences, llm=llm)

In [36]:
def get_propositions(text):
    runnable_output = runnable.invoke({
    	"input": text
    }).content
    
    propositions = extraction_chain.run(runnable_output)[0].sentences
    return propositions

In [37]:

paragraphs = essay.split("\n\n")

In [38]:

len(paragraphs)

53

In [39]:

essay_propositions = []

for i, para in enumerate(paragraphs[:5]):
    propositions = get_propositions(para)
    
    essay_propositions.extend(propositions)
    print (f"Done with {i}")

Done with 0
Done with 1
Done with 2
Done with 3
Done with 4


In [40]:

print (f"You have {len(essay_propositions)} propositions")
essay_propositions[:10]



You have 26 propositions


['October 2023',
 "One of the most important things I didn't understand about the world when I was a child is the degree to which the returns for performance are superlinear.",
 "I didn't understand one of the most important things about the world when I was a child.",
 'The returns for performance are superlinear.',
 "The degree to which the returns for performance are superlinear is one of the most important things I didn't understand about the world when I was a child.",
 'Teachers implicitly told us the returns were linear.',
 'Coaches implicitly told us the returns were linear.',
 "I heard 'You get out what you put in' a thousand times.",
 'They meant well.',
 'This is rarely true.']

In [41]:
from agentic_chunker import AgenticChunker

In [42]:

ac = AgenticChunker()

In [43]:
ac.add_propositions(essay_propositions)


Adding: 'October 2023'
No chunks, creating a new one
Created new chunk (2ca72): Date & Times

Adding: 'One of the most important things I didn't understand about the world when I was a child is the degree to which the returns for performance are superlinear.'
No chunks found
Created new chunk (1f4cf): Performance & Success Metrics

Adding: 'I didn't understand one of the most important things about the world when I was a child.'
No chunks found
Created new chunk (af41b): Personal Growth & Realizations

Adding: 'The returns for performance are superlinear.'
Chunk Found (1f4cf), adding to: Performance & Success Metrics

Adding: 'The degree to which the returns for performance are superlinear is one of the most important things I didn't understand about the world when I was a child.'
Chunk Found (1f4cf), adding to: Superlinear Returns in Performance & Success

Adding: 'Teachers implicitly told us the returns were linear.'
Chunk Found (1f4cf), adding to: Understanding Superlinear Returns


In [44]:

ac.pretty_print_chunks()


You have 11 chunks

Chunk #0
Chunk ID: 2ca72
Summary: This chunk contains information about dates and times.
Propositions:
    -October 2023



Chunk #1
Chunk ID: 1f4cf
Summary: This chunk discusses the concept of superlinear returns for performance, emphasizing its importance in various aspects of life and challenging the commonly taught linear returns mindset.
Propositions:
    -One of the most important things I didn't understand about the world when I was a child is the degree to which the returns for performance are superlinear.
    -The returns for performance are superlinear.
    -The degree to which the returns for performance are superlinear is one of the most important things I didn't understand about the world when I was a child.
    -Teachers implicitly told us the returns were linear.
    -Coaches implicitly told us the returns were linear.
    -I heard 'You get out what you put in' a thousand times.
    -It's obviously true that the returns for performance are superlinea

In [1]:
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

ARTICLE = """ Take a look around. Now you may be in a bookstore, a coffee shop, a tea house,
at work, at home, or anywhere else. Take a look around. Pay attention to people. If you arealone, think about the last time you were around a lot of people. What do those peoplewear, what kind of food do they eat;  what phone does he hold;  do they have their owncars, if so what kind; what kind of house does he live in; and the most important question ishow to make money. Try to find the answers to these questions with your close friends,relatives and loved ones.The world of slaves and masters. Observing all those people, studying their lifestyle and income, you will see that all thepeople around can be divided into three groups: slaves of money, people who do not associate with money, and masters of money. Whether a person is rich, poor, in debt, or owed by others, a hired worker, a businessman, or a politician, everyone is one of these three types of people. But why can we divide people into three groups like this? To understand
it, let's give the characteristics of these three groups.

The first is people who don't mess with money. For them, money is dirty, happiness is not in money, money is a weapon of evil. If a relative or friend asks him for money for his services, he is offended, and he is ready to work for free without asking for money. Usually such people do not have a lot of money, regardless of salary, they try to do what they like. People around him may or may not understand him - he is not very interested in him.
It is enough not to deal with money. If he gets money, he immediately tries to get rid of it: he gives it to his relatives, donates it to charity, scatters it on the field, somehow gets rid of it. Such people are rare. It doesn't even cover 1% of the world's people.

Second, slaves of money. As the name suggests, their entire life is governed by money. He asks for money and makes every decision with its consent. If you need to buy clothes, "what clothes can I buy with my money?" he asks. When making a choice to travel by bus, taxi, own car, when choosing where and what kind of food to eat, when deciding what gift to get for a child, when deciding what kind of education to get, everything in life is asked for money. Even what he works for,
how long he rests, where he lives, what his health is like, who he hangs out with... In general, money decides how he lives his life. He only obeys what the money means. His favorite words are expensive, I don't have enough money, money. 80% of people in the world belong to this group.

The third is the masters of money. They have complete control over money. They decide where they want to go, what they want to eat, what they want to see, and what they want to buy. He does not do anything to earn money, to increase money. But still the money keeps increasing. The money he spends
every month is less than the money he receives, so his money will increase over time. And those who have more money earn more money. 1% of people in the world are masters of money. At this point, you have three questions: Is it possible to become a master of money? It will be. The entire content of this book is aimed at opening that path.
"""

print(summarizer(ARTICLE, max_length=300, min_length=30, do_sample=False))



  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from langchain_community.document_loaders import UnstructuredPDFLoader

In [10]:
loader = UnstructuredPDFLoader("input.pdf", mode="elements")




In [11]:
data = loader.load()

In [14]:
print(data[-1])

page_content='Machine Translated by Google' metadata={'source': 'input.pdf', 'coordinates': {'points': ((14.7399998, 6.809990000000084), (14.7399998, 16.809990000000084), (150.9099998, 16.809990000000084), (150.9099998, 6.809990000000084)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'filename': 'input.pdf', 'languages': ['eng'], 'last_modified': '2024-07-10T14:46:46', 'page_number': 146, 'filetype': 'application/pdf', 'category': 'Header'}


In [16]:
with open("renamed.txt", "r") as file:
    text = file.read().replace("\n", " ")

with open("output.txt", "w") as file:
    file.write(text)

In [19]:
with open("renamed.txt", 'r') as file:
    lines = file.read()
    lines_divided = lines.split("\n\n")
    print(len(lines_divided))

13
