### Ollama

Ollama supports embedding models, making it possible to build retrieval augmented generation (RAG) applications that combine text prompts with existing documents or other data.

In [9]:
from langchain_ollama import OllamaEmbeddings

In [10]:
embeddings_gemma = (
    OllamaEmbeddings(model='gemma:2b') ## By default it uses LLama2 embedding technique to conver word2vec to change it add model details specicically)
)
embeddings_gemma

OllamaEmbeddings(model='gemma:2b', validate_model_on_init=False, base_url=None, client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

In [11]:
r1 = embeddings_gemma.embed_documents(
    [
        "Alpha is the first letter of Greek alphabet",
        "Beta is the second letter of the Greek alphabet"
    ]
)

In [12]:
r1

[[-0.0358243,
  -0.013973143,
  0.005217079,
  0.043571,
  -0.0055173365,
  0.0045081256,
  -0.011378423,
  -0.006291961,
  -0.0014284432,
  -0.01502213,
  0.0095783975,
  0.008646442,
  0.013403602,
  -0.014706627,
  -0.01163689,
  -0.0017009677,
  0.06275841,
  -0.010978653,
  0.017833242,
  0.0034080727,
  0.007961367,
  -0.006613123,
  0.0038592245,
  0.0033575408,
  -0.01625743,
  -0.010049496,
  -0.009226255,
  0.0017288615,
  0.0044297585,
  0.02420189,
  -0.003529713,
  0.00015752221,
  0.01913498,
  0.0097598415,
  -0.02355858,
  -0.0012423693,
  -0.017020907,
  0.012688183,
  0.0048387973,
  -0.0041338857,
  0.02197504,
  0.009588918,
  0.015500411,
  -0.001478996,
  -0.0008442244,
  -0.023668198,
  0.024617113,
  0.00034989102,
  -0.022353454,
  0.0039774985,
  -0.26385382,
  -0.1577051,
  -0.019042945,
  -0.0037710287,
  -0.028738547,
  -0.010682752,
  -0.016535668,
  0.0057891677,
  -0.011275037,
  0.012418003,
  -0.0033450176,
  -0.008761279,
  -0.017225582,
  -0.00759264

In [13]:
len(r1[0])

2048

In [15]:
r2 = embeddings_gemma.embed_query("what is the second letter of greek alphabet")

In [18]:
## To calculate the similarity between 2 vectors
from numpy import dot
import numpy as np
from numpy.linalg import norm

vec1 = np.array(r1[1])
vec2 = np.array(r2[0])

def calculate_cosine_similarity(vec1, vec2):
    """
    Calculates the cosine similarity between two numpy vectors.
    """
    dot_product = dot(vec1, vec2)
    magnitude_vec1 = norm(vec1)
    magnitude_vec2 = norm(vec2)
    
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0  # Avoid division by zero
    
    return dot_product / (magnitude_vec1 * magnitude_vec2)

similarity_score_np = calculate_cosine_similarity(vec1, vec2)
print(f"Similarity score (numpy): {similarity_score_np}")

Similarity score (numpy): [ 0.019778    0.01016633  0.00045198 ... -0.01191206  0.00076604
  0.0382593 ]


In [24]:
### Other embedding models
### https://ollama.com/blog/embedding-models
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
text = "This is a text document"
query_result = embeddings.embed_query(text)
query_result

[-0.0031470181,
 -0.035069175,
 0.03842679,
 0.028557781,
 0.0050148712,
 1.1306002e-05,
 -0.0014788369,
 -0.007560254,
 0.044574693,
 0.05585832,
 -0.020728797,
 0.0076167514,
 -0.0004157919,
 0.012124699,
 -0.019128272,
 -0.03291238,
 -0.017665159,
 -0.02175019,
 -0.04114728,
 0.008460811,
 -0.054754887,
 0.03157644,
 -0.09601952,
 -0.030276358,
 0.0036189402,
 0.022370879,
 -0.010255957,
 0.036081053,
 0.038836695,
 0.05227606,
 0.002231831,
 0.017517176,
 -0.016464397,
 -0.03362424,
 0.02370094,
 -0.006627973,
 0.049325988,
 -0.032005314,
 0.025426297,
 -0.02567944,
 0.0066767544,
 -0.0136132855,
 0.06583063,
 -0.034338847,
 -0.061467797,
 -0.030600168,
 -0.0251246,
 -0.02637962,
 -0.029121466,
 -0.019409113,
 0.015413933,
 0.0017773545,
 0.029460426,
 -0.007258961,
 0.0043944465,
 -0.023881132,
 0.0005024957,
 -0.031631537,
 -0.038807243,
 0.036334563,
 0.034679867,
 0.025937378,
 0.043209787,
 -0.01702151,
 -0.018779935,
 0.001509823,
 -0.027449735,
 -0.0024693138,
 0.024532605,


In [25]:
len(query_result)

1024

##### To know more on OLLAMA Embeddings check below website
https://ollama.com/blog/embedding-models