In [1]:
## Embedding using Hugging Face using langchain.
from langchain_huggingface import HuggingFaceEmbeddings

In [2]:
## all-MiniLM-L6-v2 -- It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
model = 'sentence-transformers/all-MiniLM-L6-v2'

#Initialize the embedding model 
embeddings = HuggingFaceEmbeddings( model_name = model)

embeddings

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [3]:
# Create Embeddings.

text = "Embeddings are a way to represent data—especially text or words—as numerical vectors in a high-dimensional space. These vectors capture the semantic meaning of the data, allowing similar items to be placed close together in that space."

embedding = embeddings.embed_query(text)
print(f"Text: {text}")
print(f"Embeddings : {embedding}")
print(f"len : {len(embedding)}")


Text: Embeddings are a way to represent data—especially text or words—as numerical vectors in a high-dimensional space. These vectors capture the semantic meaning of the data, allowing similar items to be placed close together in that space.
Embeddings : [0.018101340159773827, -0.05751695856451988, -0.013676345348358154, -0.010800442658364773, 0.045922957360744476, 0.07357092946767807, -0.032294731587171555, -0.0053786952048540115, 0.08922066539525986, -0.044415004551410675, 0.026321975514292717, 0.08210856467485428, 0.09858692437410355, 0.018321184441447258, -0.05282332003116608, 0.059912700206041336, 0.0480162538588047, 0.038127005100250244, -0.08336164802312851, -0.019792631268501282, 0.002653001807630062, -0.005887859966605902, -0.028072500601410866, -0.08043816685676575, 0.075568288564682, 0.05299662426114082, -0.05773628503084183, 0.0319172665476799, 0.042397648096084595, 0.024069376289844513, 0.08499419689178467, 0.006864940747618675, 0.0032907123677432537, 0.06552094966173172, 

In [6]:
# we can use Sentance transfermer as well. 
from sentence_transformers import SentenceTransformer

sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings = model.encode(sentences)
print(embeddings)

[[ 6.76568747e-02  6.34959713e-02  4.87131178e-02  7.93049708e-02
   3.74480747e-02  2.65281973e-03  3.93749960e-02 -7.09845219e-03
   5.93613535e-02  3.15370485e-02  6.00980595e-02 -5.29052056e-02
   4.06067446e-02 -2.59308629e-02  2.98428200e-02  1.12690695e-03
   7.35148489e-02 -5.03818318e-02 -1.22386657e-01  2.37028580e-02
   2.97265369e-02  4.24768478e-02  2.56337859e-02  1.99512066e-03
  -5.69190606e-02 -2.71597914e-02 -3.29035483e-02  6.60248324e-02
   1.19007185e-01 -4.58791144e-02 -7.26214498e-02 -3.25839706e-02
   5.23413457e-02  4.50553186e-02  8.25298484e-03  3.67024355e-02
  -1.39415739e-02  6.53918087e-02 -2.64272038e-02  2.06389377e-04
  -1.36643564e-02 -3.62810530e-02 -1.95043888e-02 -2.89737973e-02
   3.94270085e-02 -8.84090886e-02  2.62424443e-03  1.36713777e-02
   4.83062193e-02 -3.11566442e-02 -1.17329180e-01 -5.11690341e-02
  -8.85287598e-02 -2.18963176e-02  1.42986597e-02  4.44167666e-02
  -1.34815862e-02  7.43392333e-02  2.66382825e-02 -1.98762845e-02
   1.79191

### **Open AI embedding**
*1. text-embedding-3-samll*

*2. text-embedding-ada-002*

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

#load OpenAi Key
os.environ['OPENAI_API_KEY'] = os.environ("OPENAI_API_KEY")

In [None]:
from langchain_openai import OpenAIEmbeddings
## output will be 1536 dim.
embeddings = OpenAIEmbeddings(model = 'text-embedding-3-small')

In [None]:
#Single text Embeddings
single_text = "Langchain and Rag are amazing frameworks and projects to work on"
openai_embeddings = embeddings.embed_query(single_text)