In [1]:
!pip install -q sentence-transformers faiss-cpu pandas

In [2]:
# 📚 Hadiths FAISS Index Creator

# --- Install Required Libraries


# --- Step 1: Import Libraries
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# --- Step 2: Load Hadiths Data
# (Make sure you have a 'hadiths.csv' file with a column 'hadith')

hadiths_file = 'cleaned_hadith_data.csv'  # <-- Change filename if different

try:
    df = pd.read_csv(hadiths_file)
    hadiths_list = df['hadith'].dropna().tolist()
    print(f"Loaded {len(hadiths_list)} hadiths successfully!")
except Exception as e:
    print(f"Error loading hadiths file: {e}")

# --- Step 3: Load Sentence Transformer Model
model_name = 'all-MiniLM-L6-v2'  # Good balance between speed and accuracy
model = SentenceTransformer(model_name)
print("Model loaded.")

# --- Step 4: Create Embeddings
print("Creating embeddings...")
embeddings = model.encode(hadiths_list, show_progress_bar=True)

# --- Step 5: Build FAISS Index
print("Building FAISS index...")
embeddings = np.array(embeddings).astype('float32')
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# --- Step 6: Save FAISS Index and Hadiths List
faiss.write_index(index, 'hadiths_index.faiss')
np.save('hadiths_texts.npy', hadiths_list)

print("\n✅ Successfully created and saved:")
print("- 'hadiths_index.faiss' (FAISS Index File)")
print("- 'hadiths_texts.npy' (Hadith Text List)")

# --- How to Load Later (example)
# hadiths_list = np.load('hadiths_texts.npy', allow_pickle=True)
# index = faiss.read_index('hadiths_index.faiss')


Error loading hadiths file: 'hadith'


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Model loaded.
Creating embeddings...


NameError: name 'hadiths_list' is not defined