<a href="https://colab.research.google.com/github/21f3001443/notebooks/blob/main/embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import userdata

OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

In [None]:
import requests
r = requests.get("https://api.openai.com/v1/models",headers={
    "Authorization": f"Bearer {OPENAI_API_KEY}"
})

print(r.json())

{'object': 'list', 'data': [{'id': 'gpt-4-0613', 'object': 'model', 'created': 1686588896, 'owned_by': 'openai'}, {'id': 'gpt-4', 'object': 'model', 'created': 1687882411, 'owned_by': 'openai'}, {'id': 'gpt-3.5-turbo', 'object': 'model', 'created': 1677610602, 'owned_by': 'openai'}, {'id': 'gpt-4o-audio-preview-2025-06-03', 'object': 'model', 'created': 1748908498, 'owned_by': 'system'}, {'id': 'gpt-4.1-nano', 'object': 'model', 'created': 1744321707, 'owned_by': 'system'}, {'id': 'gpt-image-1', 'object': 'model', 'created': 1745517030, 'owned_by': 'system'}, {'id': 'codex-mini-latest', 'object': 'model', 'created': 1746673257, 'owned_by': 'system'}, {'id': 'gpt-4o-realtime-preview-2025-06-03', 'object': 'model', 'created': 1748907838, 'owned_by': 'system'}, {'id': 'davinci-002', 'object': 'model', 'created': 1692634301, 'owned_by': 'system'}, {'id': 'babbage-002', 'object': 'model', 'created': 1692634615, 'owned_by': 'system'}, {'id': 'gpt-3.5-turbo-instruct', 'object': 'model', 'crea

In [None]:
# /// script
# requires-python = "==3.12"
# dependencies = [
#   "sentence-transformers",
#   "httpx",
#   "numpy",
# ]
# ///

from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer('BAAI/bge-base-en-v1.5')  # A small, high quality model

async def embed(text: str) -> list[float]:
    """Get embedding vector for text using local model."""
    return model.encode(text).tolist()

async def get_similarity(text1: str, text2: str) -> float:
    """Calculate cosine similarity between two texts."""
    emb1 = np.array(await embed(text1))
    emb2 = np.array(await embed(text2))
    return float(np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2)))

async def main():
    print("Apple", "Orange", await get_similarity("Apple", "Orange"))
    print("Apple", "Lightning", await get_similarity("Apple", "Lightning"))
    print("Apple", "Sweet", await get_similarity("Apple", "Sweet"))
    print("Orange", "Sweet", await get_similarity("Orange", "Sweet"))
    print("Banana", "Sweet", await get_similarity("Banana", "Sweet"))
    print("Cake", "Sweet", await get_similarity("Cake", "Sweet"))
    print("Sugar", "Sweet", await get_similarity("Sugar", "Sweet"))
    print("Cat", "Car", await get_similarity("Cat", "Car"))
    print("Cat", "Dog", await get_similarity("Cat", "Dog"))
    print("Cat", "Tiger", await get_similarity("Cat", "Tiger"))
    print("Dog", "Tiger", await get_similarity("Dog", "Tiger"))


if __name__ == "__main__":
    # In Colab, you can directly await async functions instead of using asyncio.run()
    await main()

Apple Orange 0.6295845532708263
Apple Lightning 0.5975212858380147
Apple Sweet 0.5626999694273758
Orange Sweet 0.5651294628190171
Banana Sweet 0.5716693861367814
Cake Sweet 0.6522230127172479
Sugar Sweet 0.6821955341697183
Cat Car 0.6399051762662964
Cat Dog 0.7072966563836629
Cat Tiger 0.7590802124951175
Dog Tiger 0.643633156459927


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import httpx

model = SentenceTransformer('BAAI/bge-base-en-v1.5')  # A small, high quality model

async def embed(text: str) -> list[float]:
    """Get embedding vector for text using OpenAI's API."""
    async with httpx.AsyncClient() as client:
        response = await client.post(
            "https://api.openai.com/v1/embeddings",
            headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
            json={"model": "text-embedding-3-small", "input": text}
        )
        return response.json()["data"][0]["embedding"]

async def get_similarity(text1: str, text2: str) -> float:
    """Calculate cosine similarity between two texts."""
    emb1 = np.array(await embed(text1))
    emb2 = np.array(await embed(text2))
    return float(np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2)))

async def main():
    print(await get_similarity("Apple", "Orange"))
    print(await get_similarity("Apple", "Lightning"))
    print(await get_similarity("Apple", "Sweet"))
    print(await get_similarity("Orange", "Sweet"))
    print(await get_similarity("Banana", "Sweet"))
    print(await get_similarity("Cake", "Sweet"))
    print(await get_similarity("Sugar", "Sweet"))
    print(await get_similarity("Cat", "Car"))
    print(await get_similarity("Cat", "Dog"))
    print(await get_similarity("Cat", "Tiger"))
    print(await get_similarity("Dog", "Tiger"))


if __name__ == "__main__":
    # In Colab, you can directly await async functions instead of using asyncio.run()
    await main()

0.4453026974621058
0.31592594318685935
0.35389331203806473
0.3739836872096102
0.2834730156429561
0.3753747409879192
0.5549953980671316
0.48091151867211596
0.5730163739867703
0.4425742022373779
0.2723096753284652


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import httpx

model = SentenceTransformer('BAAI/bge-base-en-v1.5')  # A small, high quality model

async def embed(text: str) -> list[float]:
    """Get embedding vector for text using OpenAI's API."""
    async with httpx.AsyncClient() as client:
        response = await client.post(
            "https://api.openai.com/v1/embeddings",
            headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
            json={"model": "text-embedding-3-large", "input": text}
        )
        return response.json()["data"][0]["embedding"]

async def get_similarity(text1: str, text2: str) -> float:
    """Calculate cosine similarity between two texts."""
    emb1 = np.array(await embed(text1))
    emb2 = np.array(await embed(text2))
    return float(np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2)))

async def main():
    print("Apple", "Orange", await get_similarity("Apple", "Orange"))
    print("Apple", "Lightning", await get_similarity("Apple", "Lightning"))
    print("Apple", "Sweet", await get_similarity("Apple", "Sweet"))
    print("Orange", "Sweet", await get_similarity("Orange", "Sweet"))
    print("Banana", "Sweet", await get_similarity("Banana", "Sweet"))
    print("Cake", "Sweet", await get_similarity("Cake", "Sweet"))
    print("Sugar", "Sweet", await get_similarity("Sugar", "Sweet"))
    print("Cat", "Car", await get_similarity("Cat", "Car"))
    print("Cat", "Dog", await get_similarity("Cat", "Dog"))
    print("Cat", "Tiger", await get_similarity("Cat", "Tiger"))
    print("Dog", "Tiger", await get_similarity("Dog", "Tiger"))


if __name__ == "__main__":
    # In Colab, you can directly await async functions instead of using asyncio.run()
    await main()

Apple Orange 0.4369372395994161
Apple Lightning 0.2700036462244235
Apple Sweet 0.3446515549960304
Orange Sweet 0.3442843851634875
Banana Sweet 0.3450434036641629
Cake Sweet 0.43900873289851455
Sugar Sweet 0.49483241427554403
Cat Car 0.4798208495643023
Cat Dog 0.6274555314751838
Cat Tiger 0.4018383381456561
Dog Tiger 0.37192962780232597
