In [1]:
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

### Single Sentence Embedding

In [3]:
query = "Hello world, I am learning RAG and its quite fun but some what boring too"
query_result = embeddings.embed_query(query)
print(f"Query Result: {query_result}")

Query Result: [-0.016566958278417587, -0.011108607985079288, 0.048585277050733566, -0.030438365414738655, -0.12032133340835571, -0.026212945580482483, 0.0426289364695549, -0.05351897329092026, -0.0069634211249649525, 0.02004082314670086, -0.039426449686288834, -0.022192515432834625, -0.0185454823076725, 0.01274647656828165, -0.025865621864795685, 0.019233349710702896, 0.06140965223312378, 0.09203187376260757, 0.03162933140993118, -0.08759164810180664, -0.05836508423089981, 0.01065761037170887, 0.03808968886733055, -0.05014326050877571, -0.018986962735652924, 0.02189304307103157, -0.014532716944813728, 0.02019180729985237, 0.05855998769402504, -0.07644309103488922, -0.06370460242033005, 0.09749741107225418, -0.00017525012663099915, -0.06523774564266205, -0.07631976902484894, 0.008461076766252518, 0.04221532493829727, 0.02592979371547699, -0.0046716113574802876, 0.08226475119590759, -0.00026840189821086824, 0.09545058012008667, 0.040767136961221695, -0.09637914597988129, 0.05999047681689

In [10]:
sentences = [
    "Hello world, I am learning RAG and its quite fun but some what boring too",
    "Its my first time writing python code",
    "I love to learn new things",
    "I love to play online games",
    "I love to play BGMI",
]
query_result = embeddings.embed_documents(sentences)
print(f"Document Result: {query_result}")


Document Result: [[-0.01656695269048214, -0.011108651757240295, 0.04858528822660446, -0.03043835423886776, -0.1203213706612587, -0.026212895289063454, 0.04262898862361908, -0.053518932312726974, -0.006963424850255251, 0.02004081755876541, -0.03942641615867615, -0.022192489355802536, -0.018545441329479218, 0.012746450491249561, -0.025865552946925163, 0.01923341676592827, 0.06140969693660736, 0.09203191101551056, 0.03162934631109238, -0.08759166300296783, -0.05836508423089981, 0.010657601058483124, 0.03808969631791115, -0.05014326050877571, -0.018986936658620834, 0.021893002092838287, -0.01453273743391037, 0.02019181102514267, 0.05856001749634743, -0.07644309848546982, -0.06370458006858826, 0.09749741852283478, -0.0001752749376464635, -0.06523773074150085, -0.07631977647542953, 0.00846113171428442, 0.042215317487716675, 0.025929827243089676, -0.004671608097851276, 0.0822647213935852, -0.00026841324870474637, 0.09545065462589264, 0.04076716676354408, -0.09637914597988129, 0.05999045819044

### Cosine Similarity between sentences

In [11]:
import numpy as np
def cosine_similarity(vec1,vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

In [12]:
for i in range(len(sentences)):
    for j in range(i+1, len(sentences)):
        sim = cosine_similarity(query_result[i], query_result[j])
        print(f"Cosine Similarity between '{sentences[i]}' and '{sentences[j]}': {sim}") 

Cosine Similarity between 'Hello world, I am learning RAG and its quite fun but some what boring too' and 'Its my first time writing python code': 0.20999468041475208
Cosine Similarity between 'Hello world, I am learning RAG and its quite fun but some what boring too' and 'I love to learn new things': 0.3485078682776142
Cosine Similarity between 'Hello world, I am learning RAG and its quite fun but some what boring too' and 'I love to play online games': 0.22364187287060483
Cosine Similarity between 'Hello world, I am learning RAG and its quite fun but some what boring too' and 'I love to play BGMI': 0.2852157768289313
Cosine Similarity between 'Its my first time writing python code' and 'I love to learn new things': 0.2168934261074986
Cosine Similarity between 'Its my first time writing python code' and 'I love to play online games': 0.10836407962273789
Cosine Similarity between 'Its my first time writing python code' and 'I love to play BGMI': 0.11514111300711799
Cosine Similarity be

### Semantic Search in Vector DB (Not exactly vector db, but from vectors)

In [27]:
def semantic_search(query, documents, top_k=3):
    query_vec = embeddings.embed_query(query)
    doc_vecs = embeddings.embed_documents(documents)
    
    similarities = []

    for i,doc_emb in enumerate(doc_vecs):
        similarity = cosine_similarity(query_vec,doc_emb)
        similarities.append((similarity,documents[i]))
    
    similarities.sort(reverse=True)
    return similarities[:top_k]

In [29]:
documents = [
    "RAG is a new way to build LLM applications.",
    "LangChain is a framework for building applications with LLMs.",
    "HuggingFace provides a variety of models for NLP tasks.",
    "Python is a popular programming language.",
    "Many people prefer python for data science.",
]
query = "What is Langchain?"
results = semantic_search(query, documents)
results


[(np.float64(0.6278864276716127),
  'LangChain is a framework for building applications with LLMs.'),
 (np.float64(0.2402854823578326),
  'HuggingFace provides a variety of models for NLP tasks.'),
 (np.float64(0.2006510485224054), 'Python is a popular programming language.')]