In [2]:
from dotenv import load_dotenv
import os

# 指定 .env 文件的路徑
load_dotenv(dotenv_path="../docker/custom_implements/external_knowledge_api/.env")

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=GOOGLE_API_KEY)
vector = embeddings.embed_query("hello, world!")
vector[:5]

  from .autonotebook import tqdm as notebook_tqdm


[0.014134909026324749,
 -0.022324152290821075,
 -0.054603420197963715,
 -0.006284549366682768,
 -0.03392402455210686]

In [9]:
from langchain_core.documents import Document
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector

# See docker command above to launch a postgres instance with pgvector enabled.
connection = "postgresql+psycopg://postgres:difyai123456@localhost:5681/dify"  # Uses psycopg3!
collection_name = "my_docs"


vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

In [10]:
docs = [
    Document(
        page_content="there are cats in the pond",
        metadata={"id": 1, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="ducks are also found in the pond",
        metadata={"id": 2, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="fresh apples are available at the market",
        metadata={"id": 3, "location": "market", "topic": "food"},
    ),
    Document(
        page_content="the market also sells fresh oranges",
        metadata={"id": 4, "location": "market", "topic": "food"},
    ),
    Document(
        page_content="the new art exhibit is fascinating",
        metadata={"id": 5, "location": "museum", "topic": "art"},
    ),
    Document(
        page_content="a sculpture exhibit is also at the museum",
        metadata={"id": 6, "location": "museum", "topic": "art"},
    ),
    Document(
        page_content="a new coffee shop opened on Main Street",
        metadata={"id": 7, "location": "Main Street", "topic": "food"},
    ),
    Document(
        page_content="the book club meets at the library",
        metadata={"id": 8, "location": "library", "topic": "reading"},
    ),
    Document(
        page_content="the library hosts a weekly story time for kids",
        metadata={"id": 9, "location": "library", "topic": "reading"},
    ),
    Document(
        page_content="a cooking class for beginners is offered at the community center",
        metadata={"id": 10, "location": "community center", "topic": "classes"},
    ),
]

vector_store.add_documents(docs)

['05fbbba9-8891-4b93-82b6-a0089885496f',
 '6d055d17-0284-4cc0-a3e1-6257940560eb',
 '41fd6e33-3850-47d2-bb12-366f01ced544',
 '08b7e325-ac3d-464f-b448-6755d041a352',
 'f9b26ba8-5fa5-4054-a62d-08b18e834f72',
 '1e992120-1757-4264-9644-1cba78825b08',
 '756cb8c1-998f-4f13-9326-fa5f90040ddf',
 '89b1669d-6acc-4e3a-aaf5-a618384c8aac',
 '6db1a8f1-2617-4344-acb3-da014d647709',
 'b774cd25-5b78-458f-870e-79c15fd7a81c']

In [12]:
results = vector_store.similarity_search(
    "kitty", k=10, filter={"id": {"$in": [1, 5, 2]}}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* there are cats in the pond [{'id': 1, 'topic': 'animals', 'location': 'pond'}]
* the new art exhibit is fascinating [{'id': 5, 'topic': 'art', 'location': 'museum'}]
* ducks are also found in the pond [{'id': 2, 'topic': 'animals', 'location': 'pond'}]


In [40]:
results = vector_store.similarity_search_with_score(
    "kitty", k=10
)
for doc in results:
    print(doc)

(Document(id='05fbbba9-8891-4b93-82b6-a0089885496f', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='there are cats in the pond'), 0.46418707171232376)
(Document(id='f9b26ba8-5fa5-4054-a62d-08b18e834f72', metadata={'id': 5, 'topic': 'art', 'location': 'museum'}, page_content='the new art exhibit is fascinating'), 0.5485132875996297)
(Document(id='1e992120-1757-4264-9644-1cba78825b08', metadata={'id': 6, 'topic': 'art', 'location': 'museum'}, page_content='a sculpture exhibit is also at the museum'), 0.5818487194706126)
(Document(id='756cb8c1-998f-4f13-9326-fa5f90040ddf', metadata={'id': 7, 'topic': 'food', 'location': 'Main Street'}, page_content='a new coffee shop opened on Main Street'), 0.5834100818303845)
(Document(id='6d055d17-0284-4cc0-a3e1-6257940560eb', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='ducks are also found in the pond'), 0.5889675167527818)
(Document(id='6db1a8f1-2617-4344-acb3-da014d647709', metadata={'id': 9, 

In [42]:
filtered_results = [result for result in results if result[1] > 0.6]
filtered_results

[(Document(id='89b1669d-6acc-4e3a-aaf5-a618384c8aac', metadata={'id': 8, 'topic': 'reading', 'location': 'library'}, page_content='the book club meets at the library'),
  0.6063067544691274),
 (Document(id='08b7e325-ac3d-464f-b448-6755d041a352', metadata={'id': 4, 'topic': 'food', 'location': 'market'}, page_content='the market also sells fresh oranges'),
  0.6121789490178853),
 (Document(id='41fd6e33-3850-47d2-bb12-366f01ced544', metadata={'id': 3, 'topic': 'food', 'location': 'market'}, page_content='fresh apples are available at the market'),
  0.6131443546681734),
 (Document(id='b774cd25-5b78-458f-870e-79c15fd7a81c', metadata={'id': 10, 'topic': 'classes', 'location': 'community center'}, page_content='a cooking class for beginners is offered at the community center'),
  0.6569217254094291)]

In [43]:
sorted_results = sorted(filtered_results, key=lambda x: x[1], reverse=True)
sorted_results

[(Document(id='b774cd25-5b78-458f-870e-79c15fd7a81c', metadata={'id': 10, 'topic': 'classes', 'location': 'community center'}, page_content='a cooking class for beginners is offered at the community center'),
  0.6569217254094291),
 (Document(id='41fd6e33-3850-47d2-bb12-366f01ced544', metadata={'id': 3, 'topic': 'food', 'location': 'market'}, page_content='fresh apples are available at the market'),
  0.6131443546681734),
 (Document(id='08b7e325-ac3d-464f-b448-6755d041a352', metadata={'id': 4, 'topic': 'food', 'location': 'market'}, page_content='the market also sells fresh oranges'),
  0.6121789490178853),
 (Document(id='89b1669d-6acc-4e3a-aaf5-a618384c8aac', metadata={'id': 8, 'topic': 'reading', 'location': 'library'}, page_content='the book club meets at the library'),
  0.6063067544691274)]

In [44]:
final_results = sorted_results[:10]
final_results

[(Document(id='b774cd25-5b78-458f-870e-79c15fd7a81c', metadata={'id': 10, 'topic': 'classes', 'location': 'community center'}, page_content='a cooking class for beginners is offered at the community center'),
  0.6569217254094291),
 (Document(id='41fd6e33-3850-47d2-bb12-366f01ced544', metadata={'id': 3, 'topic': 'food', 'location': 'market'}, page_content='fresh apples are available at the market'),
  0.6131443546681734),
 (Document(id='08b7e325-ac3d-464f-b448-6755d041a352', metadata={'id': 4, 'topic': 'food', 'location': 'market'}, page_content='the market also sells fresh oranges'),
  0.6121789490178853),
 (Document(id='89b1669d-6acc-4e3a-aaf5-a618384c8aac', metadata={'id': 8, 'topic': 'reading', 'location': 'library'}, page_content='the book club meets at the library'),
  0.6063067544691274)]