In [14]:

documents = [{'content': "How do I reset my password?\nTo reset your password, click on the 'Forgot Password' link on the login page.\nYou will receive a password reset email within a few minutes.\nIf you do not receive the email, check your spam folder.",
  'metadata': {'tenant_id': 'test_company',
   'source': 'faq',
   'chunk_index': 0}},
 {'content': 'Refund Policy\nRefunds are processed within 5â€“7 business days after approval.\nRefunds are not available for discounted or promotional plans.\nOnce a refund is processed, it may take additional time for the bank to reflect the amount.',
  'metadata': {'tenant_id': 'test_company',
   'source': 'faq',
   'chunk_index': 1}},
 {'content': 'Account Deletion\nYou can request account deletion by contacting support.\nOnce deleted, your data cannot be recovered.',
  'metadata': {'tenant_id': 'test_company',
   'source': 'faq',
   'chunk_index': 2}}]

In [15]:
import os
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()

embeddings_model = OpenAIEmbeddings(
    model="text-embedding-3-small"  # cheap + strong
)



In [16]:
texts = [doc["content"] for doc in documents]
metadatas = [doc["metadata"] for doc in documents]


In [17]:
vectors = embeddings_model.embed_documents(texts)

vectors


[[0.00011646169878076762,
  -0.03564607352018356,
  0.002262410242110491,
  0.0008837340865284204,
  -0.02257038839161396,
  0.00527938362210989,
  -0.0009297751821577549,
  0.046941496431827545,
  0.0015986505895853043,
  0.03515496477484703,
  0.01756725274026394,
  0.029875582084059715,
  0.03169676661491394,
  0.02300010621547699,
  -0.0011625387705862522,
  0.016001852229237556,
  -0.018723396584391594,
  -0.008486916311085224,
  -0.08700751513242722,
  0.020503653213381767,
  -0.022509001195430756,
  0.03282221406698227,
  0.010599693283438683,
  0.04256247356534004,
  0.014958254061639309,
  0.019889771938323975,
  -0.00022908663959242404,
  -0.05557676777243614,
  -0.013566788285970688,
  0.014518305659294128,
  -0.005289615131914616,
  -0.0224476121366024,
  -0.07575301826000214,
  0.02762468345463276,
  0.010732701048254967,
  -0.012789204716682434,
  -0.012676659971475601,
  0.03879733011126518,
  0.03098057024180889,
  0.011008947156369686,
  -0.039288438856601715,
  0.0002

In [18]:
len(vectors)          # number of chunks
len(vectors[0])       # vector dimension (1536)


1536

In [19]:
vectors[0][:10]

[0.00011646169878076762,
 -0.03564607352018356,
 0.002262410242110491,
 0.0008837340865284204,
 -0.02257038839161396,
 0.00527938362210989,
 -0.0009297751821577549,
 0.046941496431827545,
 0.0015986505895853043,
 0.03515496477484703]

In [20]:
embedded_docs = []

for text, vector, metadata in zip(texts, vectors, metadatas):
    embedded_docs.append({
        "content": text,
        "embedding": vector,
        "metadata": metadata
    })
    
embedded_docs


[{'content': "How do I reset my password?\nTo reset your password, click on the 'Forgot Password' link on the login page.\nYou will receive a password reset email within a few minutes.\nIf you do not receive the email, check your spam folder.",
  'embedding': [0.00011646169878076762,
   -0.03564607352018356,
   0.002262410242110491,
   0.0008837340865284204,
   -0.02257038839161396,
   0.00527938362210989,
   -0.0009297751821577549,
   0.046941496431827545,
   0.0015986505895853043,
   0.03515496477484703,
   0.01756725274026394,
   0.029875582084059715,
   0.03169676661491394,
   0.02300010621547699,
   -0.0011625387705862522,
   0.016001852229237556,
   -0.018723396584391594,
   -0.008486916311085224,
   -0.08700751513242722,
   0.020503653213381767,
   -0.022509001195430756,
   0.03282221406698227,
   0.010599693283438683,
   0.04256247356534004,
   0.014958254061639309,
   0.019889771938323975,
   -0.00022908663959242404,
   -0.05557676777243614,
   -0.013566788285970688,
   0.0145

In [21]:
query = "How long does a refund take?"

query_vector = embeddings_model.embed_query(query)

len(query_vector)


1536