In [30]:
import IPython
import sys 

import vertexai

PROJECT_ID = ""
REGION = ""

import langchain

# schema for the input message 
from langchain.schema import HumanMessage, SystemMessage
from langchain.schema import Document

from langchain.llms import VertexAI
from langchain.embeddings import VertexAIEmbeddings

from langchain.chat_models import ChatVertexAI
from google.cloud import aiplatform

import time 
from typing import List
from pydantic import BaseModel


##### 2 PromptTemplate
from langchain import PromptTemplate

### Example Selectors
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector


# Database 
from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS


### output Parsers
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

# Document Loaders
from langchain.document_loaders import WebBaseLoader
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


## Memory 
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

# chains
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import RetrievalQA

Utility for VertexAI Embedding API

In [5]:
# Utility functions for Embeddings API with rate limiting
def rate_limit(max_per_minute):
    period = 60 / max_per_minute
    print("Waiting")
    while True:
        before = time.time()
        yield
        after = time.time()
        elapsed = after - before
        sleep_time = max(0, period - elapsed)
        if sleep_time > 0:
            print(".", end="")
            time.sleep(sleep_time)


class CustomVertexAIEmbeddings(VertexAIEmbeddings, BaseModel):
    requests_per_minute: int
    num_instances_per_batch: int

    # Overriding embed_documents method
    def embed_documents(self, texts: List[str]):
        limiter = rate_limit(self.requests_per_minute)
        results = []
        docs = list(texts)

        while docs:
            # Working in batches because the API accepts maximum 5
            # documents per request to get embeddings
            head, docs = (
                docs[: self.num_instances_per_batch],
                docs[self.num_instances_per_batch :],
            )
            chunk = self.client.get_embeddings(head)
            results.extend(chunk)
            next(limiter)

        return [r.values for r in results]

TypeError: metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases

In [6]:
# LLM model
llm = VertexAI(
    model_name="text-bison@001",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

# Chat
chat = ChatVertexAI()


KeyboardInterrupt: 

In [11]:
# Embedding
EMBEDDING_QPM = 100
EMBEDDING_NUM_BATCH = 5
embeddings = CustomVertexAIEmbeddings(
    requests_per_minute=EMBEDDING_QPM,
    num_instances_per_batch=EMBEDDING_NUM_BATCH,
)

NameError: name 'CustomVertexAIEmbeddings' is not defined

In [None]:
my_text = "What day comes after Friday?"

llm(my_text)

In [7]:
chat([HumanMessage(content="Hello")])

NameError: name 'chat' is not defined

In [None]:
res = chat(
    [
        SystemMessage(
            content="You are a nice AI bot that helps a user figure out what to eat in one short sentence"
        ),
        HumanMessage(content="I like tomatoes, what should I eat?"),
    ]
)

print(res.content)

In [None]:
res = chat(
    [
        HumanMessage(
            content="What are the ingredients required for making a tomato sandwich?"
        )
    ]
)
print(res.content)

In [None]:
res = chat([HumanMessage(content="How many slices of bread you said?")])
print(res.content)

In [None]:
Document(
    page_content="This is my document. It is full of text that I've gathered from other places",
    metadata={
        "my_document_id": 234234,
        "my_document_source": "The LangChain Papers",
        "my_document_create_time": 1680013019,
    },
)

In [10]:
text = "Hi! It's time for the beach"

text_embedding = embeddings.embed_query(text)
print(f"Your embedding is length {len(text_embedding)}")
print(f"Here's a sample: {text_embedding[:5]}...")

NameError: name 'embeddings' is not defined

In [13]:
template = """
I really want to travel to {location}. What should I do there?

Respond in one short sentence
"""

prompt = PromptTemplate(
    input_variables=["location"],
    template=template,
)

final_prompt = prompt.format(location="Rome")


print(final_prompt)


I really want to travel to Rome. What should I do there?

Respond in one short sentence



In [3]:

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

# Examples of locations that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]

In [4]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples,
    embeddings=embeddings,
    vectorstore_cls=FAISS,
    k=2,  # number of examples to produce 
)

NameError: name 'embeddings' is not defined

In [None]:
similar_prompt = FewShotPromptTemplate(
    # The object that will help select examples
    example_selector=example_selector,
    # Your prompt
    example_prompt=example_prompt,
    # Customizations that will be added to the top and bottom of your prompt
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",
    # What inputs your prompt will receive
    input_variables=["noun"],
)

In [None]:
# Select a noun!
my_noun = "student"

print(similar_prompt.format(noun=my_noun))

OUTPUT PARSER

In [None]:
# How you would like your response structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(
        name="bad_string", description="This a poorly formatted user input string"
    ),
    ResponseSchema(
        name="good_string", description="This is your response, a reformatted response"
    ),
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [None]:
# See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

In [None]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly including country, city and state names

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template,
)

promptValue = prompt.format(user_input="welcom to dbln!")

print(promptValue)

### Indexes


Document Loaders

In [8]:
loader = WebBaseLoader("http://www.paulgraham.com/worked.html")

data = loader.load()

In [9]:
print(f"Found {len(data)} comments")
print(f"Here's a sample:\n\n{''.join([x.page_content[:150] for x in data[:2]])}")

Found 1 comments
Here's a sample:

What I Worked On

February 2021Before college the two main things I worked on, outside of school,
were writing and programming. I didn't write essays.


In [10]:

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=20,
)

texts = text_splitter.split_documents(pg_work)

NameError: name 'RecursiveCharacterTextSplitter' is not defined

In [None]:
print(f"You have {len(texts)} documents")

In [None]:
print("Preview:")
print(texts[0].page_content, "\n")
print(texts[1].page_content)

### Retrievers

In [12]:
loader = WebBaseLoader("http://www.paulgraham.com/worked.html")
documents = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Embed your texts
db = FAISS.from_documents(texts, embeddings)


# Init your retriever. Asking for just 1 document back
retriever = db.as_retriever()



retriever

NameError: name 'embeddings' is not defined

In [None]:
docs = retriever.get_relevant_documents(
    "what types of things did the author want to develop or build?"
)

print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

## VectorStores

In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [14]:
print(f"You have {len(texts)} documents")

You have 52 documents


In [15]:
embedding_list = embeddings.embed_documents([text.page_content for text in texts])

NameError: name 'embeddings' is not defined

In [16]:
print(f"You have {len(embedding_list)} embeddings")
print(f"Here's a sample of one: {embedding_list[0][:3]}...")

NameError: name 'embedding_list' is not defined

## Memory

In [18]:

conversation = ConversationChain(
    llm=llm, verbose=True, memory=ConversationBufferMemory()
)

conversation.predict(input="Hi there!")

NameError: name 'llm' is not defined

In [19]:
conversation.predict(input="What is the capital of France?")

NameError: name 'conversation' is not defined

In [None]:
conversation.predict(input="What are some popular places I can see in France?")

In [None]:
conversation.predict(input="What question did I ask first?")

### Chains


In [21]:
template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

# Holds my 'location' chain
location_chain = LLMChain(llm=llm, prompt=prompt_template)

NameError: name 'llm' is not defined

In [None]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [None]:
review = overall_chain.run("Rome")

#### Summarize Chain

In [23]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)
chain.run(texts)

NameError: name 'llm' is not defined

## Question Answering Chain

In [25]:
url = "https://abc.xyz/assets/investor/static/pdf/20230203_alphabet_10K.pdf"
loader = PyPDFLoader(url)
documents = loader.load()

In [26]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(f"# of documents = {len(docs)}")

# of documents = 263


In [27]:
embeddings

NameError: name 'embeddings' is not defined

In [29]:
db = Chroma.from_documents(docs, embeddings)

retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})

NameError: name 'embeddings' is not defined

In [31]:

qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)


query = "What was Alphabet's net income in 2022?"
result = qa({"query": query})
print(result)

NameError: name 'llm' is not defined

In [None]:
query = "How much office space reduction took place in 2023?"
result = qa({"query": query})
print(result)