In [None]:
!pip install torch transformers faiss-cpu numpy datasets sentence-transformers faiss-gpu



This code helps you to check your embedding output

In [None]:
import faiss
from sentence_transformers import SentenceTransformer, util
# Load pre-trained sentence transformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Define your custom data as a list of text strings
sentences = ["This is an example sentence", "Each sentence is converted"]

# Convert sentences to embeddings
embeddings = model.encode(sentences)
print(embeddings)

# Create a FAISS index for efficient similarity search
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# Save the index for future use
faiss.write_index(index, "my_vector_database.faiss")
print("Vector database created and saved!")

[[ 6.76569194e-02  6.34959713e-02  4.87131476e-02  7.93049783e-02
   3.74480933e-02  2.65277131e-03  3.93749513e-02 -7.09846430e-03
   5.93614057e-02  3.15369777e-02  6.00981042e-02 -5.29052615e-02
   4.06067781e-02 -2.59308387e-02  2.98428647e-02  1.12688600e-03
   7.35148042e-02 -5.03817983e-02 -1.22386612e-01  2.37028562e-02
   2.97265332e-02  4.24768738e-02  2.56337579e-02  1.99516211e-03
  -5.69190793e-02 -2.71598026e-02 -3.29035036e-02  6.60248846e-02
   1.19007230e-01 -4.58791181e-02 -7.26214051e-02 -3.25840116e-02
   5.23413308e-02  4.50553037e-02  8.25298298e-03  3.67024504e-02
  -1.39415674e-02  6.53918684e-02 -2.64271945e-02  2.06370896e-04
  -1.36643145e-02 -3.62810642e-02 -1.95043720e-02 -2.89738011e-02
   3.94270122e-02 -8.84091184e-02  2.62427772e-03  1.36713730e-02
   4.83062901e-02 -3.11566498e-02 -1.17329173e-01 -5.11690713e-02
  -8.85288343e-02 -2.18963325e-02  1.42986095e-02  4.44167666e-02
  -1.34815648e-02  7.43392259e-02  2.66382862e-02 -1.98763069e-02
   1.79191

This code helps to understand how to retrive your custom data and provide input to the LLMs , using LLMs potenial for your own use case. Simple implementaion of RAG

In [None]:
import faiss
from sentence_transformers import SentenceTransformer

# Load pre-trained sentence transformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Define your custom data as a list of text strings
sentences = ["India is worlds best place", "Rome is also decent", "cinque terre is wonderful", "Kerela is Gods own country", "Moon is not on earth", "Europe is a must visit"]

# Convert sentences to embeddings
embeddings = model.encode(sentences)

# Create a FAISS index for efficient similarity search
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# Save the index for future use
faiss.write_index(index, "my_vector_database.faiss")
print("Vector database created and saved!")

# Load the FAISS index from disk
index = faiss.read_index("my_vector_database.faiss")

# Define your query sentence
query_sentence = "which is worlds best place"

# Convert the query sentence to an embedding
query_embedding = model.encode([query_sentence])

# Perform a similarity search using FAISS
k = 5  # Retrieve the top k most similar sentences
distances, similar_indices = index.search(query_embedding, k)


# Prepare the response incorporating the retrieved similar sentences
response = f"The query sentence is: '{query_sentence}'.\n\n"
response += "Top similar sentences:\n"

# Print the retrieved sentences and their distances
for i, distance in enumerate(distances.ravel()):
    response += f"{i+1}. (Similarity: {1-distance:.4f}): {sentences[similar_indices[0][i]]}\n"

print(response)


best_match = sentences[similar_indices[0][0]]
best_match_distance = 1 - distances[0][0]  # Convert distance to similarity score
response = f"The best match for your query '{query_sentence}' is:\n'{best_match}' with a similarity score of {best_match_distance:.4f}."

print(response)


Vector database created and saved!
The query sentence is: 'which is worlds best place'.

Top similar sentences:
1. (Similarity: 0.5098): India is worlds best place
2. (Similarity: -0.1724): Europe is a must visit
3. (Similarity: -0.2183): Rome is also decent
4. (Similarity: -0.4220): cinque terre is wonderful
5. (Similarity: -0.5599): Kerela is Gods own country

The best match for your query 'which is worlds best place' is:
'India is worlds best place' with a similarity score of 0.5098.
