<h1 align="center"><font color="yellow">PGVector: Crie, armazene e consulte Embeddings OpenAI no PostgreSQL usando pgvector</font></h1>

<font color="yellow">Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro</font>

Vamos seguir os seguintes passos:

* docker pull ankane/pgvector

* docker-compose up -d

* Connect to the server 





In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector, DistanceStrategy
from langchain.docstore.document import Document

import os
import openai
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')


embeddings = OpenAIEmbeddings()

connection_string = PGVector.connection_string_from_db_params(
    driver=os.environ.get("DB_DRIVER", "psycopg"),
    host=os.getenv('DB_HOST'),
    port=os.getenv('DB_PORT'),
    database=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD')
)


In [None]:
loader = TextLoader('./data/exemplo_DETRAN-DF.txt')
documents = loader.load()

In [None]:
documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=250,
                                      chunk_overlap=60,
                                      separator="\n"
                                     )

docs = text_splitter.split_documents(documents)
print(len(documents))
print(len(docs))

In [None]:
docs

In [None]:
from typing import List, Tuple

collection_name = 'DETRAN-DF'
# O Módulo PGVector tentará criar uma tabela com o nome da coleção (collection). 
# Portanto, certifique-se de que o nome da coleção seja exclusivo e que o usuário tenha permissão para criar uma tabela.
db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=collection_name,
    connection_string=connection_string,
)


In [None]:
query ="O que é a central de atendimento ao cidadão?" #"Quais são os requisitos necessários para a educação hospitalar?"
docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query,
                                                                                k=4)


In [None]:
docs_with_score

In [None]:
for doc, score in docs_with_score:
    print("-" * 100)
    print("Score: ", score)
    print("Conteúdo: ", doc.page_content)
    print("")
    print(doc.metadata)
    print("-" * 100)
    

In [None]:
store = PGVector(
    connection_string=connection_string, 
    embedding_function=embeddings, 
    collection_name= 'DETRAN-DF', 
    distance_strategy=DistanceStrategy.COSINE
)

retriever = store.as_retriever(search_kwargs={"k": 3})


In [None]:
retriever

In [None]:

retriever.get_relevant_documents(query="O que é a central de atendimento ao cidadão?")
