# Exercise C: A first look into search

In [1]:
from typing import Sequence
from aleph_alpha_client import ImagePrompt, AlephAlphaClient, AlephAlphaModel, SemanticEmbeddingRequest, SemanticRepresentation, Prompt
import math
import os

In [2]:
# instantiate the client and model
model = AlephAlphaModel(
    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("API_TOKEN")),
    model_name = "luminous-base"
)

### Simple functions for embedding and searching

In [3]:
# function for symmetric embedding
def embed_symmetric(text: str):
    request = SemanticEmbeddingRequest(prompt=Prompt.from_text(text), representation=SemanticRepresentation.Symmetric)
    result = model.semantic_embed(request)
    return result.embedding

# function to calculate similarity
def cosine_similarity(v1: Sequence[float], v2: Sequence[float]) -> float:
    "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)"
    sumxx, sumxy, sumyy = 0, 0, 0
    for i in range(len(v1)):
        x = v1[i]; y = v2[i]
        sumxx += x*x
        sumyy += y*y
        sumxy += x*y
    return sumxy/math.sqrt(sumxx*sumyy)

### Tasks: 
1. Play around with the semantic similarity of the word embeddings
    - What difference does language make?
    - What difference does the size of the text make?
    - Can you find a semantic opposite of a text?


In [4]:
# define the texts
text_a = "The sun is shining"
text_b = "Il sole splende"

# show the similarity
print(cosine_similarity(embed_symmetric(text_a), embed_symmetric(text_b)))

0.9123379711230551
