In [12]:
from dotenv import load_dotenv
import os
load_dotenv()
print("✅ Setup complete")

✅ Setup complete


In [19]:
from langchain_openai import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)
query = "What is machine learning?"
query_vector = embeddings.embed_query(query)

print(f"Query: {query}")
print(f"Vector dimention: {len(query_vector)}")
print(f"First 5 values: {query_vector[:5]}")

Query: What is machine learning?
Vector dimention: 1536
First 5 values: [-0.021972427144646645, -0.008794128894805908, 0.009013337083160877, -0.018426407128572464, -0.01260448805987835]


In [None]:
docs = [
    "Machine learning is a subset of AI",
    "Deep learning uses neural networks",
    "The weather is sunny today"
]
doc_vector = embeddings.embed_documents(docs)

print(f"Documents: {docs}")
print(f"Embedded docs: {len(doc_vector)} documents")
print((f"Top five: {doc_vector[:3]}"))

Documents: ['Machine learning is a subset of AI', 'Deep learning uses neural networks', 'The weather is sunny today']
Embedded docs: 3 documents
Top five: [[-0.012437409721314907, -0.014258624985814095, 0.007187643088400364, -0.008328333497047424, -0.0239285659044981, 0.009767157956957817, -0.003934084437787533, 0.005285413004457951, -0.0020480570383369923, -0.03782424330711365, 0.014401211403310299, 0.03766869381070137, -0.0017013131873682141, -0.015865961089730263, 0.0006671578157693148, -0.0001741820597089827, 0.024032264947891235, 0.006487674545496702, 0.025976622477173805, -0.012787394225597382, -0.024667421355843544, 0.02864687331020832, 0.0032389762345701456, -0.028698723763227463, -0.00596269778907299, -0.005797427147626877, 0.011011547408998013, -0.03380590304732323, -0.014232699759304523, 0.0010953216115012765, 0.03168007358908653, -0.0008992655202746391, -0.010706931352615356, -0.008691280148923397, -0.011523561552166939, -0.0033767016138881445, -0.003642430528998375, 0.0133

In [9]:
!uv pip install langchain-google-genai

[2mAudited [1m1 package[0m [2min 500ms[0m[0m


In [17]:
if os.getenv("GOOGLE_API_KEY"):
    from langchain_google_genai import GoogleGenerativeAIEmbeddings

    gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

    vector = gemini_embeddings.embed_query("Hello gemini")
    print(f"Gemini vector dimention: {len(vector)}")
    print(f"Top five: {vector[:5]}")
else:
    print("⚠️ Google API key not found. Set GOOGLE_API_KEY in .env")

Gemini vector dimention: 3072
Top five: [-0.015084697, 0.0035684705, 0.0005565054, -0.059673294, -0.007449892]


In [20]:
import numpy as np

def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

texts = [
    "I love programming",
    "I enjoy coding",
    "The weather is nice"
]

vectors = [embeddings.embed_query(text) for text in texts]

print("Similarity score: ")
print(f"'{texts[0]}' vs '{texts[1]}': {cosine_similarity(vectors[0], vectors[1]):.4f}")
print(f"'{texts[0]}' vs '{texts[2]}': {cosine_similarity(vectors[0], vectors[2]):.4f}")
print("\nSimilar meanings = Higher similarity score!")

Similarity score: 
'I love programming' vs 'I enjoy coding': 0.9455
'I love programming' vs 'The weather is nice': 0.7787

Similar meanings = Higher similarity score!
