### Embedding Techniques Using HuggingFace

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()   ## load all the enviroment variable

True

In [2]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

#### Sentence Transformers on Hugging Face
Sentence Transformers on Hugging Face provides pre-trained models that convert sentences into fixed-size embeddings (vectors). These embeddings capture the semantic meaning of the sentences, enabling tasks like semantic similarity, clustering, and search. It builds on the `transformers` library, making it easy to use transformer-based models for sentence-level tasks. You can quickly load pre-trained models like `all-MiniLM-L6-v2` to generate embeddings and perform various NLP tasks efficiently.

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
text = "This is a text documents"
query_result = embeddings.embed_query(text)
query_result

[-0.03409251570701599,
 0.1383490413427353,
 -0.017611760646104813,
 0.02173461578786373,
 0.01572098582983017,
 0.03230596333742142,
 -0.007109498139470816,
 0.04695632681250572,
 0.0524078793823719,
 0.021984916180372238,
 0.03684245049953461,
 0.07539767771959305,
 0.0065864319913089275,
 -0.02489376999437809,
 -0.09387518465518951,
 0.018351197242736816,
 -0.021494854241609573,
 -0.02909439615905285,
 0.010531189851462841,
 0.02855474315583706,
 0.015742667019367218,
 0.1276061236858368,
 0.025473717600107193,
 -0.01610785350203514,
 -0.004425093997269869,
 0.08774898946285248,
 -0.09410804510116577,
 0.0022262251004576683,
 0.0532919205725193,
 -0.007829656824469566,
 -0.0026936749927699566,
 0.06016496196389198,
 0.13216763734817505,
 0.05405008792877197,
 0.029209453612565994,
 0.004754429683089256,
 0.014401349239051342,
 0.038379114121198654,
 0.021466081961989403,
 0.0293189138174057,
 -0.03510774299502373,
 -0.09220030903816223,
 -0.00602716812863946,
 0.014241090975701809,


In [5]:
len(query_result)

384

In [6]:
doc_result = embeddings.embed_documents([text, "This is not a test document."])
doc_result[1]

[-0.018281683325767517,
 0.11222253739833832,
 -0.014233186841011047,
 0.023113183677196503,
 0.018091244623064995,
 -0.06463783234357834,
 -0.09148520976305008,
 0.01180263701826334,
 -0.015986399725079536,
 0.07231071591377258,
 0.08414015173912048,
 0.02900097891688347,
 -0.018477780744433403,
 -0.026501799002289772,
 -0.06828751415014267,
 -0.030838493257761,
 0.002305087633430958,
 -0.04135825112462044,
 0.042809344828128815,
 0.11545827239751816,
 0.05841616541147232,
 0.041316524147987366,
 0.06467802822589874,
 0.0003132724959868938,
 0.03307410702109337,
 0.030750421807169914,
 -0.054606981575489044,
 0.0355413481593132,
 0.012300804257392883,
 -0.06342155486345291,
 0.017335858196020126,
 0.080361008644104,
 0.017498228698968887,
 0.021355947479605675,
 0.10046583414077759,
 -0.008925353176891804,
 0.08060933649539948,
 0.011547706089913845,
 0.04908853769302368,
 0.03925776854157448,
 -0.009053289890289307,
 -0.10785303264856339,
 -0.020152784883975983,
 0.036959078162908554