### Part 01: Introduction to Embedding

In [0]:
# Install the Databricks SDK if not already present in your cluster environment
%pip install -q databricks-sdk

In [0]:
from databricks.sdk import WorkspaceClient

# The WorkspaceClient automatically picks up authentication from the notebook environment
w = WorkspaceClient()

# Define your endpoint name (use only the endpoint name, not the full URL)
endpoint_name = "embedding-model"

# Text to embed
text_to_embed = "The sun rises in the east"

# Call the endpoint
response = w.serving_endpoints.query(
    name=endpoint_name,
    inputs=[text_to_embed]
)

# Print the resulting embeddings
print(f"Embedding vector (first 5 dimensions): {response.predictions['data'][0]['embedding'][:5]}")
print(f"Embedding dimension: {len(response.predictions['data'][0]['embedding'])}")

### Part 02: Calculating Cosine similarity score between two text samples 

In [0]:
pip install -q sentence_transformers 

In [0]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Two texts
text1 = "King"
text2 = "Men"

# Generate embeddings
emb1 = model.encode([text1])
emb2 = model.encode([text2])

# Compute cosine similarity
similarity = cosine_similarity(emb1, emb2)[0][0]

print(f"Semantic similarity: {similarity:.4f}")


In [0]:
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Example texts
text1 = "The world is a beautiful place"
text2 = "The planet we live in is good "

response = w.serving_endpoints.query(
    name=endpoint_name,
    inputs=[text1,text2]
)

# Generate embeddings
emb1 = response.predictions['data'][0]['embedding'][0]
emb2 = response.predictions['data'][0]['embedding'][1]

# Compute similarity
similarity = cosine_similarity(emb1, emb2)
print(f"Semantic similarity: {similarity:.4f}")