In [None]:
!pip install sentence_transformers

In [10]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
from sentence_transformers.quantization import quantize_embeddings

# 1. Specify preffered dimensions
dimensions = 512

# 2. load model
model = SentenceTransformer("all-mpnet-base-v2")

# For retrieval you need to pass this prompt.
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread while a girl is wathing him'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 2. Encode
embeddings = model.encode(docs)

# Optional: Quantize the embeddings
binary_embeddings = quantize_embeddings(embeddings, precision="ubinary")

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)

similarities: tensor([[0.2601, 0.0635, 0.0499, 0.0336]])


In [11]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
from sentence_transformers.quantization import quantize_embeddings

# 1. Specify preffered dimensions
dimensions = 512

# 2. load model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1", truncate_dim=dimensions)

# For retrieval you need to pass this prompt.
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread while a girl is wathing him'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 2. Encode
embeddings = model.encode(docs)

# Optional: Quantize the embeddings
binary_embeddings = quantize_embeddings(embeddings, precision="ubinary")

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)

similarities: tensor([[0.6357, 0.4859, 0.2854, 0.2616]])


In [12]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
from sentence_transformers.quantization import quantize_embeddings

# 1. Specify preffered dimensions
dimensions = 512

# 2. load model
model = SentenceTransformer("all-MiniLM-L6-v2")

# For retrieval you need to pass this prompt.
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread while a girl is wathing him'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 2. Encode
embeddings = model.encode(docs)

# Optional: Quantize the embeddings
binary_embeddings = quantize_embeddings(embeddings, precision="ubinary")

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)

similarities: tensor([[0.3144, 0.1944, 0.1033, 0.0252]])


In [9]:
import os
import openai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain_openai import OpenAIEmbeddings

# Set the OpenAI API key
os.environ['OPENAI_API_KEY'] = 'Open-AI-key'

openai.api_key = os.environ['OPENAI_API_KEY']

# Define the query and documents
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread while a girl is watching him'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# Function to get embeddings using LangChain's OpenAI integration
def get_openai_embeddings(documents):
    embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")
    embeddings = embeddings_model.embed_documents(documents)
    return embeddings

# Encode the documents
embeddings = get_openai_embeddings(docs)

# Convert embeddings to numpy array
embeddings_array = np.array(embeddings)

# Calculate cosine similarities between the query and other documents
similarities = cosine_similarity([embeddings_array[0]], embeddings_array[1:])
print('similarities:', similarities)


similarities: [[0.8572839  0.83922635 0.7889852  0.79171162]]


In [None]:
pip install langchain_openai

In [None]:
!pip install langchain_cohere

In [None]:
!pip install langchain_google_genai

In [14]:
import os
import cohere
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain_cohere import CohereEmbeddings

# Set the Cohere API key
os.environ['COHERE_API_KEY'] = 'Cohere-API-key'
cohere_api_key = os.environ['COHERE_API_KEY']

# Define the query and documents
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread while a girl is watching him'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# Function to get embeddings using LangChain's Cohere integration
def get_cohere_embeddings(documents):
    embeddings_model = CohereEmbeddings(model="embed-english-light-v2.0")
    embeddings = embeddings_model.embed_documents(documents)
    return embeddings

# Encode the documents
embeddings = get_cohere_embeddings(docs)

# Convert embeddings to numpy array
embeddings_array = np.array(embeddings)

# Calculate cosine similarities between the query and other documents
similarities = cosine_similarity([embeddings_array[0]], embeddings_array[1:])
print('similarities:', similarities)


similarities: [[0.5669538  0.42870585 0.32511208 0.21893115]]
