### Embedding Techniques Using HuggingFace

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()   ## load all the enviroment variable

True

In [2]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

#### Sentence Transformers on Hugging Face
Sentence Transformers on Hugging Face provides pre-trained models that convert sentences into fixed-size embeddings (vectors). These embeddings capture the semantic meaning of the sentences, enabling tasks like semantic similarity, clustering, and search. It builds on the `transformers` library, making it easy to use transformer-based models for sentence-level tasks. You can quickly load pre-trained models like `all-MiniLM-L6-v2` to generate embeddings and perform various NLP tasks efficiently.

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
text = "This is a text documents"
query_result = embeddings.embed_query(text)
query_result

[-0.03409251570701599,
 0.1383490413427353,
 -0.017611760646104813,
 0.02173461578786373,
 0.01572098582983017,
 0.03230596333742142,
 -0.007109498139470816,
 0.04695632681250572,
 0.0524078793823719,
 0.021984916180372238,
 0.03684245049953461,
 0.07539767771959305,
 0.0065864319913089275,
 -0.02489376999437809,
 -0.09387518465518951,
 0.018351197242736816,
 -0.021494854241609573,
 -0.02909439615905285,
 0.010531189851462841,
 0.02855474315583706,
 0.015742667019367218,
 0.1276061236858368,
 0.025473717600107193,
 -0.01610785350203514,
 -0.004425093997269869,
 0.08774898946285248,
 -0.09410804510116577,
 0.0022262251004576683,
 0.0532919205725193,
 -0.007829656824469566,
 -0.0026936749927699566,
 0.06016496196389198,
 0.13216763734817505,
 0.05405008792877197,
 0.029209453612565994,
 0.004754429683089256,
 0.014401349239051342,
 0.038379114121198654,
 0.021466081961989403,
 0.0293189138174057,
 -0.03510774299502373,
 -0.09220030903816223,
 -0.00602716812863946,
 0.014241090975701809,


In [5]:
len(query_result)

384

In [7]:
doc_result = embeddings.embed_documents([text, "Thsis is not a test document."])
doc_result[1]

[-0.02208917774260044,
 0.08907604962587357,
 -0.04481242969632149,
 -0.06639129668474197,
 0.00915132649242878,
 -0.08660538494586945,
 -0.09737208485603333,
 0.041419532150030136,
 -0.008440366014838219,
 0.08509156107902527,
 0.07808689773082733,
 0.006889576558023691,
 -0.06268030405044556,
 -0.02653786540031433,
 -0.02454984188079834,
 -0.0707143172621727,
 -0.01277246419340372,
 -0.06961122900247574,
 0.06627274304628372,
 0.043334875255823135,
 0.046929240226745605,
 0.07643017172813416,
 0.0246534775942564,
 0.010904255323112011,
 0.008225412108004093,
 0.03978750854730606,
 -0.06791894882917404,
 0.05231381580233574,
 0.02840059995651245,
 -0.032053057104349136,
 -0.05990532413125038,
 0.08405544608831406,
 -0.006682668812572956,
 0.04497493803501129,
 0.11863455921411514,
 0.02368209883570671,
 0.04180881008505821,
 0.03563879430294037,
 0.022084509953856468,
 -0.04195237159729004,
 0.016837075352668762,
 -0.10166006535291672,
 -0.01719725877046585,
 0.03269028663635254,
 0.0