In [47]:
import os
from langchain_ollama import ChatOllama
import numpy as np

In [36]:
PDF_FILE = "paul.pdf"

# We'll be using Llama 3.1 8B for this example.
MODEL = "llama3.1"

In [7]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(PDF_FILE)
pages = loader.load()

print(f"Number of pages: {len(pages)}")
print(f"Length of a page: {len(pages[1].page_content)}")
# print("Content of a page:", pages[1].page_content)

Number of pages: 9
Length of a page: 3272


# Splitting the pages in chunks


In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)

chunks = splitter.split_documents(pages)
print(f"Number of chunks: {len(chunks)}")
print(f"Length of a chunk: {len(chunks[1].page_content)}")
print("Content of a chunk:", chunks[1].page_content)

Number of chunks: 23
Length of a chunk: 1236
Content of a chunk: took better advantage of it than Stripe. At YC we use the term
"Collison installation" for the technique they invented. More
diffident founders ask "Will you try our beta?" and if the answer is
yes, they say "Great, we'll send you a link." But the Collison
brothers weren't going to wait. When anyone agreed to try Stripe
they'd say "Right then, give me your laptop" and set them up on
the spot.
There are two reasons founders resist going out and recruiting
users individually. One is a combination of shyness and laziness.
They'd rather sit at home writing code than go out and talk to a
bunch of strangers and probably be rejected by most of them.
But for a startup to succeed, at least one founder (usually the
CEO) will have to spend a lot of time on sales and marketing. [2]
The other reason founders ignore this path is that the absolute
numbers seem so small at first. This can't be how the big, famous
startups got started, th

In [9]:
type(pages)

list

In [10]:
type(pages[0])

langchain_core.documents.base.Document

In [16]:
# import inspect
# inspect.findsource(RecursiveCharacterTextSplitter.split_documents)

In [19]:
chunks

[Document(metadata={'source': 'paul.pdf', 'page': 0}, page_content="Want to start a startup? Get funded by Y Combinator.\nJuly 2013\nOne of the most common types of advice we give at Y Combinator\nis to do things that don't scale. A lot of would-be founders believe\nthat startups either take off or don't. You build something, make\nit available, and if you've made a better mousetrap, people beat a\npath to your door as promised. Or they don't, in which case the\nmarket must not exist. [1]\nActually startups take off because the founders make them take\noff. There may be a handful that just grew by themselves, but\nusually it takes some sort of push to get them going. A good\nmetaphor would be the cranks that car engines had before they\ngot electric starters. Once the engine was going, it would keep\ngoing, but there was a separate and laborious process to get it\ngoing.\nRecruit\nThe most common unscalable thing founders have to do at the\nstart is to recruit users manually. Nearly al

# Vector Store

In [22]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.ollama import OllamaEmbeddings

In [37]:
embeddings = OllamaEmbeddings(model=MODEL)

In [38]:
embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='llama3.1', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [39]:
vectorstore = FAISS.from_documents(chunks, embeddings)

In [40]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x7fbf72598220>

In [45]:
# Let's embed one chunk manually
chunk_embed = embeddings.embed_query(chunks[0].page_content)

In [49]:
chunk_embed = np.array(chunk_embed)
chunk_embed

array([ 0.03564216,  0.0808422 ,  1.59629381, ...,  1.49724066,
       -0.05644307,  0.38610113])

In [50]:
chunk_embed.shape

(4096,)

# Retriever

In [41]:
retriever = vectorstore.as_retriever()

In [42]:
retriever.invoke("What can you get away with when you only have a small number of users?")

[Document(id='b9640d0d-0640-40c8-8bfc-54d25c750b5d', metadata={'source': 'paul.pdf', 'page': 2}, page_content="in charge of their narrow domain of building things, rather than\nrunning the whole show. You can be ornery when you're Scotty,\nbut not when you're Kirk.\nAnother reason founders don't focus enough on individual\ncustomers is that they worry it won't scale. But when founders of\nlarval startups worry about this, I point out that in their current\nstate they have nothing to lose. Maybe if they go out of their way\nto make existing users super happy, they'll one day have too\nmany to do so much for. That would be a great problem to have.\nSee if you can make it happen. And incidentally, when it does,\nyou'll find that delighting customers scales better than you\nexpected. Partly because you can usually find ways to make\nanything scale more than you would have predicted, and partly\nbecause delighting customers will by then have permeated your\nculture.\nI have never once seen 

# Parsing the model's response


In [52]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser

In [53]:
model = ChatOllama(model=MODEL, temperature=0)
model.invoke("Who is the president of the United States?")

AIMessage(content='As of my last update in April 2023, Joe Biden is the President of the United States. However, please note that this information may have changed since then due to updates or changes in leadership. For the most current and accurate information, I recommend checking a reliable news source or official government website for the latest details on the presidency.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2024-12-27T09:51:51.867624571Z', 'done': True, 'done_reason': 'stop', 'total_duration': 639939503, 'load_duration': 10162385, 'prompt_eval_count': 19, 'prompt_eval_duration': 14000000, 'eval_count': 69, 'eval_duration': 615000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-98016bba-3b14-45dc-a78d-091df4707910-0', usage_metadata={'input_tokens': 19, 'output_tokens': 69, 'total_tokens': 88})

In [54]:
parser = StrOutputParser()

chain = model | parser 
print(chain.invoke("Who is the president of the United States?"))

As of my last update in April 2023, Joe Biden is the President of the United States. However, please note that this information may have changed since then due to updates or changes in leadership. For the most current and accurate information, I recommend checking a reliable news source or official government website for the latest details on the presidency.


# Setting up a prompt


In [55]:
from langchain.prompts import PromptTemplate

template = """
You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: {context}

Question: {question}
"""

In [56]:
prompt = PromptTemplate.from_template(template)

In [57]:
print(prompt.format(context="Here is some context", question="Here is a question"))


You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: Here is some context

Question: Here is a question



# Adding the prompt to the chain

In [58]:
chain = prompt | model | parser

chain.invoke({
    "context": "Anna's sister is Susan", 
    "question": "Who is Susan's sister?"
})

'Anna.'

# Adding the retriever to the chain

<img src="https://raw.githubusercontent.com/svpino/gentle-intro-to-rag/b7755d1e1ceb8119af84d766466ca5582e679721/images/chain2.png" alt="Drawing" style="width: 500px;"/>

In [60]:
from operator import itemgetter

# The question needs to be input to both the retriever and the prompt
chain = (
    # The prompt needs two inputs: the context and the question
    # The context is the output of the retriever
    # The retriever needs the question to retrieve the context, so "question" is passed to it
    # But prompt, also needs the question, so we need to pass it to it as well
    # Finally, since prompt receives two inputs, we need to pass both the context and the question to it
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

# Using the chain to answer questions

In [61]:
questions = [
    "What can you get away with when you only have a small number of users?",
    "What's the most common unscalable thing founders have to do at the start?",
    "What's one of the biggest things inexperienced founders and investors get wrong about startups?",
]

In [62]:
for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print("*************************\n")

Question: What can you get away with when you only have a small number of users?
Answer: You can provide a level of service no big company can.
*************************

Question: What's the most common unscalable thing founders have to do at the start?
Answer: Going out and recruiting users individually.
*************************

Question: What's one of the biggest things inexperienced founders and investors get wrong about startups?
Answer: They underestimate the power of compound growth.
*************************

