In [6]:
!pip install faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [14]:
from sentence_transformers import SentenceTransformer
import faiss
import json
import os

model = SentenceTransformer("all-MiniLM-L6-v2")
index_file = "my_index.faiss"
texts_file = "texts.json"

# Load or create index
if os.path.exists(index_file):
    index = faiss.read_index(index_file)
else:
    index = faiss.IndexFlatIP(384)

# Load or init texts
if os.path.exists(texts_file):
    with open(texts_file, "r") as f:
        texts = json.load(f)
else:
    texts = []

def add_text(new_texts):
    global texts, index
    embeddings = model.encode(new_texts, normalize_embeddings=True).astype('float32')
    index.add(embeddings)
    texts.extend(new_texts)

    # Save to disk
    faiss.write_index(index, index_file)
    with open(texts_file, "w") as f:
        json.dump(texts, f)

def search(query, top_k=5):
    query_emb = model.encode([query], normalize_embeddings=True).astype('float32')
    D, I = index.search(query_emb, top_k)
    return [texts[i] for i in I[0] if i != -1]

# Example usage
add_text([
    "This is an AI workshop",
    "Machine learning and data science",
    "Cooking recipes for Nepali food"
])

results = search("Tell me something about machine learning")
for text in results:
    print("Text:", text)

Text: Machine learning and data science
Text: This is an AI workshop
Text: Cooking recipes for Nepali food
