## Huggingface Embeddings

In [2]:
# Define Huggingface Embeddings
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [3]:
# Create first embeddings
text = "Hello, I am learning about embeddings"

embedding = embeddings.embed_query(text)
print(f"Text: {text}")
print(f"Embeddings Length: {len(embedding)}")
print(embedding)

Text: Hello, I am learning about embeddings
Embeddings Length: 384
[-0.01666165329515934, -0.09771719574928284, 0.0019201480317860842, -0.0016134995967149734, 0.014046616852283478, 0.060839176177978516, 0.012283624149858952, 0.015745408833026886, 0.040937986224889755, -0.030124563723802567, 0.02626514621078968, 0.0655929297208786, 0.045701686292886734, 0.00889705028384924, -0.046205971390008926, 0.01946255937218666, 0.04677304998040199, 0.07357387989759445, -0.07927092164754868, -0.006600852590054274, -0.03383966535329819, -0.04663919657468796, 0.010644854046404362, -0.09673545509576797, 0.024105282500386238, -0.02533305250108242, -0.021411189809441566, 0.059114906936883926, 0.10023825615644455, -0.06024160236120224, 0.03970738872885704, -0.033746298402547836, -0.019574373960494995, 0.0662289708852768, -0.0617646649479866, 0.11689270287752151, 0.03955421969294548, -0.0030686757527291775, -0.0818960964679718, -0.0036541421432048082, 0.02808087319135666, 0.025156382471323013, -0.02704865

In [4]:
sentences = [
    "The cat sat on the mat",
    "A feline rested on the rug",
    "The dog played in the yard",
    "I love programming in Python",
    "Python is my favorite programming language"
]

embeddings_sentence = embeddings.embed_documents(sentences)
print(embeddings_sentence)

[[0.1304018646478653, -0.011870077811181545, -0.02811702899634838, 0.05123864486813545, -0.055974431335926056, 0.030191510915756226, 0.030161332339048386, 0.024698402732610703, -0.018370604142546654, 0.058766767382621765, -0.02495313435792923, 0.06015421822667122, 0.03983176127076149, 0.03323053568601608, -0.06131140515208244, -0.049373093992471695, -0.05486355721950531, -0.04007604718208313, 0.056429073214530945, 0.03915662318468094, -0.03473711758852005, -0.013247695751488209, 0.0319661982357502, -0.06349919736385345, -0.06017858162522316, 0.07823453098535538, -0.02830389142036438, -0.04744286090135574, 0.04035928100347519, -0.0066309040412306786, -0.066740982234478, -0.0041913725435733795, -0.02531169354915619, 0.053341664373874664, 0.01742807775735855, -0.09792360663414001, 0.006061322055757046, -0.06524164229631424, 0.04557260498404503, 0.023641854524612427, 0.0765848159790039, -0.010264377109706402, -0.004076780751347542, -0.06232282891869545, 0.033705245703458786, 0.018661124631

In [5]:
print(embeddings_sentence[0])
print(embeddings_sentence[1])

[0.1304018646478653, -0.011870077811181545, -0.02811702899634838, 0.05123864486813545, -0.055974431335926056, 0.030191510915756226, 0.030161332339048386, 0.024698402732610703, -0.018370604142546654, 0.058766767382621765, -0.02495313435792923, 0.06015421822667122, 0.03983176127076149, 0.03323053568601608, -0.06131140515208244, -0.049373093992471695, -0.05486355721950531, -0.04007604718208313, 0.056429073214530945, 0.03915662318468094, -0.03473711758852005, -0.013247695751488209, 0.0319661982357502, -0.06349919736385345, -0.06017858162522316, 0.07823453098535538, -0.02830389142036438, -0.04744286090135574, 0.04035928100347519, -0.0066309040412306786, -0.066740982234478, -0.0041913725435733795, -0.02531169354915619, 0.053341664373874664, 0.01742807775735855, -0.09792360663414001, 0.006061322055757046, -0.06524164229631424, 0.04557260498404503, 0.023641854524612427, 0.0765848159790039, -0.010264377109706402, -0.004076780751347542, -0.06232282891869545, 0.033705245703458786, 0.0186611246317

In [6]:
# Popular models comparison
models = {
    "all-MiniLM-L6-v2": {
        "size": 384,
        "description": "Fast and efficient, good quality",
        "use_case": "General purpose, real-time applications"
    },
    "all-mpnet-base-v2": {
        "size": 768,
        "description": "Best quality, slower than MiniLM",
        "use_case": "When quality matters more than speed"
    },
    "all-MiniLM-L12-v2": {
        "size": 384,
        "description": "Slightly better than L6, bit slower",
        "use_case": "Good balance of speed and quality"
    },
    "multi-qa-MiniLM-L6-cos-v1": {
        "size": 384,
        "description": "Optimized for question-answering",
        "use_case": "Q&A systems, semantic search"
    },
    "paraphrase-multilingual-MiniLM-L12-v2": {
        "size": 384,
        "description": "Supports 50+ languages",
        "use_case": "Multilingual applications"
    }
}

print("📊 Popular Open Source Embedding Models:\n")
for model_name, info in models.items():
    print(f"Model: sentence-transformers/{model_name}")
    print(f"  📏 Embedding size: {info['size']} dimensions")
    print(f"  📝 Description: {info['description']}")
    print(f"  🎯 Use case: {info['use_case']}\n")

📊 Popular Open Source Embedding Models:

Model: sentence-transformers/all-MiniLM-L6-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Fast and efficient, good quality
  🎯 Use case: General purpose, real-time applications

Model: sentence-transformers/all-mpnet-base-v2
  📏 Embedding size: 768 dimensions
  📝 Description: Best quality, slower than MiniLM
  🎯 Use case: When quality matters more than speed

Model: sentence-transformers/all-MiniLM-L12-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Slightly better than L6, bit slower
  🎯 Use case: Good balance of speed and quality

Model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
  📏 Embedding size: 384 dimensions
  📝 Description: Optimized for question-answering
  🎯 Use case: Q&A systems, semantic search

Model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Supports 50+ languages
  🎯 Use case: Multilingual applications

