In [1]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
def create_faiss_index(source_file, output_file, delimiter='==CHUNK END==', model_name='all-MiniLM-L6-v2'):
    
    # Read and split the text
    with open(source_file, "r", encoding="utf-8") as f:
        content = f.read()
    chunks = content.split(delimiter)

    # Load the model and encode the chunks
    model = SentenceTransformer(model_name)
    embeddings = model.encode(chunks)

    # Create the FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))

    # Save the index to the output file
    faiss.write_index(index, output_file)
    print(f"Index created and saved to {output_file}")

# Bucatar

In [11]:
source_file = '../recipes_dataset/parsed_recipes.txt'
output_file = 'recipes.faiss'
create_faiss_index(source_file, output_file)

Index created and saved to recipes.faiss


# Psiholog

In [12]:
source_file = '../conversations_dataset/parsed_conversations.txt'
output_file = 'conversations.faiss'
create_faiss_index(source_file, output_file)

Index created and saved to conversations.faiss
