In [10]:
%pip install supabase unstructured langchain_text_splitters -qU

Note: you may need to restart the kernel to use updated packages.


In [17]:
import os
os.environ["SUPABASE_URL"] = ""
os.environ["SUPABASE_KEY"] = ""
os.environ["OPENAI_API_KEY"] = ""


In [18]:
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.documents import Document
from supabase.client import create_client, Client
supabase_url = os.environ["SUPABASE_URL"]
supabase_key = os.environ["SUPABASE_KEY"]
supabase = create_client(supabase_url, supabase_key)

embeddings = OpenAIEmbeddings()

In [19]:
from langchain_community.document_loaders import UnstructuredURLLoader
urls = ["https://supabase.com/blog/openai-embeddings-postgres-vector"]
loader = UnstructuredURLLoader(urls=urls)
docs=loader.load()

In [20]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
splits = text_splitter.split_documents(docs)
len(splits)

22

In [6]:
splits

[Document(metadata={'source': 'https://supabase.com/blog/openai-embeddings-postgres-vector', 'start_index': 0}, page_content="Back\n\nBlog\n\nStoring OpenAI embeddings in Postgres with pgvector\n\n06 Feb 2023\n\n15 minute read\n\nGreg RichardsonEngineering\n\nA new PostgreSQL extension is now available in Supabase: pgvector, an open-source vector similarity search.\n\nThe exponential progress of AI functionality over the past year has inspired many new real world applications. One specific challenge has been the ability to store and query embeddings at scale. In this post we'll explain what embeddings are, why we might want to use them, and how we can store and query them in PostgreSQL using pgvector.\n\n🆕 Supabase has now released an open source toolkit for developing AI applications using Postgres and pgvector. Learn more in the AI & Vectors docs.\n\nWhat are embeddings?#\n\nEmbeddings capture the “relatedness” of text, images, video, or other types of information. This relatedness i

In [14]:
vectorstore = SupabaseVectorStore.from_documents(
    splits,
    embeddings,
    client=supabase,
    table_name="documents",
    query_name="match_documents"
)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retriever_docs = retriever.invoke("How to store embedding with pgvector?")

In [10]:
retriever_docs

[Document(metadata={'source': 'https://supabase.com/blog/openai-embeddings-postgres-vector', 'start_index': 4456}, page_content="What if I want to create/update/delete embeddings dynamically?\n\nWhat if I'm not using Python?\n\nUsing PostgreSQL#\n\nEnter pgvector, an extension for PostgreSQL that allows you to both store and query vector embeddings within your database. Let's try it out.\n\nFirst we'll enable the Vector extension. In Supabase, this can be done from the web portal through Database → Extensions. You can also do this in SQL by running:\n\n_10\n\ncreate extension vector;\n\nNext let's create a table to store our documents and their embeddings:\n\n_10\n\ncreate table documents (\n\n_10\n\nid bigserial primary key,\n\n_10\n\ncontent text,\n\n_10\n\nembedding vector(1536)\n\n_10\n\n);\n\npgvector introduces a new data type called vector. In the code above, we create a column named embedding with the vector data type. The size of the vector defines how many dimensions the vect

In [16]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

class Person(BaseModel):
    name: str = Field(description="The name of the person")
    age: int = Field(description="The age of the person")

prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved contexts to answer the question. If you don't know the answer, just say that you don't know. 
Question: {question}
Context: {context}
Answer:
"""
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
prompt = ChatPromptTemplate.from_template(prompt)
rag_chain = ({"context": (retriever | format_docs), "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
)

rag_chain.invoke("How to store embedding with pgvector?")


'To store embeddings with pgvector, you can enable the Vector extension in PostgreSQL and create a table with a column of type vector to store the embeddings. The size of the vector should match the number of dimensions in the embeddings you are working with.'