In [1]:
import requests
import numpy as np

sentence1 = "The cat sat on the mat."
sentence2 = "A cat was sitting on the mat."

# A request to the local Ollama API for getting embeddings
def get_embedding(text, url="http://localhost:11434/api/embeddings"):
    data = {
        "model": 'nomic-embed-text', 
        "prompt": text
    }
    try:
        response = requests.post(url=url, json=data)

        if response.status_code == 200:
            response_emb = np.array(response.json()['embedding'])
            return response_emb
        else:
            print("Error:", response.status_code, response.json())
            return None
    except Exception as e:
        print(f"API call failed: {e}")
        return None

# Get embeddings for the sentences
embedding_1 = get_embedding(sentence1)
embedding_2 = get_embedding(sentence2)

In [2]:
def cosine_sim(vec1, vec2):
    # Dot product of two vectors
    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    
    # Magnitude of each vector
    magnitude_vec1 = sum(a ** 2 for a in vec1) ** 0.5
    magnitude_vec2 = sum(b ** 2 for b in vec2) ** 0.5
    
    # Handle zero
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0.0
    
    # Cosine similarity
    return dot_product / (magnitude_vec1 * magnitude_vec2)

# def pearson_correlation(vec1, vec2):
#     mean1, mean2 = sum(vec1) / len(vec1), sum(vec2) / len(vec2)
#     numerator = sum((a - mean1) * (b - mean2) for a, b in zip(vec1, vec2))
#     denominator = (sum((a - mean1) ** 2 for a in vec1) ** 0.5) * (sum((b - mean2) ** 2 for b in vec2) ** 0.5)
#     return numerator / denominator if denominator != 0 else 0


In [3]:
sentence1 = "The cat sat on the mat."
sentence2 = "A cat was sitting on the mat."

embedding_1 = get_embedding(sentence1)
embedding_2 = get_embedding(sentence2)

cosine_sim(embedding_1, embedding_2)

0.9777018864480065