In [3]:
from sentence_transformers import SentenceTransformer, CrossEncoder, util
import faiss
import numpy as np

corpus = [
    "The Eiffel Tower is in Paris.",
    "The Great Wall of China is visible from space.",
    "Machine learning is a subfield of artificial intelligence.",
    "Paris is the capital city of France.",
    "Python is a popular programming language.",
    "The Pyramids of Giza are in Egypt.",
]
corpus_ids = list(range(len(corpus)))

bi_encoder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = bi_encoder.encode(corpus, convert_to_tensor=False, show_progress_bar=True)
corpus_embeddings = np.array(corpus_embeddings).astype("float32")

dimension = corpus_embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(corpus_embeddings)

cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def search_with_rerank(query, top_k=3):
    query_embedding = bi_encoder.encode([query])[0].astype("float32")
    D, I = faiss_index.search(np.array([query_embedding]), top_k)
    initial_results = [(corpus[idx], idx) for idx in I[0]]
    cross_inp = [[query, corpus[idx]] for idx in I[0]]
    cross_scores = cross_encoder.predict(cross_inp)
    reranked_results = sorted(zip(cross_scores, initial_results), key=lambda x: x[0], reverse=True)
    print(f"\n🔍 Query: {query}\n")
    for score, (text, idx) in reranked_results:
        print(f"Score: {score:.4f} | Doc ID: {idx} | Text: {text}")

search_with_rerank("Where is the Eiffel Tower?")
search_with_rerank("Tell me about programming languages.")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


🔍 Query: Where is the Eiffel Tower?

Score: 9.8405 | Doc ID: 0 | Text: The Eiffel Tower is in Paris.
Score: -6.7940 | Doc ID: 3 | Text: Paris is the capital city of France.
Score: -10.6811 | Doc ID: 1 | Text: The Great Wall of China is visible from space.

🔍 Query: Tell me about programming languages.

Score: 0.6079 | Doc ID: 4 | Text: Python is a popular programming language.
Score: -10.1453 | Doc ID: 2 | Text: Machine learning is a subfield of artificial intelligence.
Score: -11.1572 | Doc ID: 5 | Text: The Pyramids of Giza are in Egypt.


In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from sentence_transformers import CrossEncoder

docs = [
    Document(page_content="Boil water in a large pot with a pinch of salt."),
    Document(page_content="Add pasta to boiling water and stir occasionally."),
    Document(page_content="Cook pasta for 8-10 minutes until al dente."),
    Document(page_content="Drain the pasta using a colander."),
    Document(page_content="Add your favorite sauce and mix well."),
    Document(page_content="Serve the pasta hot with grated cheese on top."),
    Document(page_content="Optionally, garnish with basil or parsley for flavor."),
]

text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)

embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(split_docs, embed_model)

cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

def search_with_rerank(query, k=3):
    retrieved_docs = vectorstore.similarity_search(query, k=k)
    pairs = [[query, doc.page_content] for doc in retrieved_docs]
    scores = cross_encoder.predict(pairs)
    reranked = sorted(zip(scores, retrieved_docs), key=lambda x: x[0], reverse=True)
    print(f"\n🔍 Query: {query}\n")
    for score, doc in reranked:
        print(f"Score: {score:.4f} | Text: {doc.page_content}")

search_with_rerank("How to cook pasta?")
search_with_rerank("What should I add after boiling pasta?")



🔍 Query: How to cook pasta?

Score: 3.4070 | Text: Add pasta to boiling water and stir occasionally.
Score: 1.3242 | Text: Drain the pasta using a colander.
Score: 0.8529 | Text: Serve the pasta hot with grated cheese on top.

🔍 Query: What should I add after boiling pasta?

Score: 2.8984 | Text: Add pasta to boiling water and stir occasionally.
Score: -5.8037 | Text: Serve the pasta hot with grated cheese on top.
Score: -6.2938 | Text: Drain the pasta using a colander.
