In [3]:
import pandas as pd
import faiss
import pickle
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer

# Load dataset
df = pd.read_excel("cleaned_filtered_ds.xlsx")
df.columns = df.columns.str.lower()

descriptions = df['description'].astype(str).tolist()
solutions = df['solution'].astype(str).tolist()
categories = df['category'].astype(str).tolist()

# Load model and compute both embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

desc_embeddings = normalize(model.encode(descriptions, show_progress_bar=True))
soln_embeddings = normalize(model.encode(solutions, show_progress_bar=True))  # 🆕

# Create FAISS index for description embeddings
dimension = desc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(desc_embeddings)
faiss.write_index(index, "desc_index.faiss")


# Load precomputed description embeddings
with open("desc_embeddings.pkl", "rb") as f:
    desc_embeddings = pickle.load(f)

# Load precomputed solution embeddings
with open("soln_embeddings.pkl", "rb") as f:
    soln_embeddings = pickle.load(f)

# Save metadata
metadata = list(zip(descriptions, solutions, categories))
with open("desc_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)

print("Saved FAISS index, embeddings, and metadata.")


Batches:   0%|          | 0/840 [00:00<?, ?it/s]

Batches:   0%|          | 0/840 [00:00<?, ?it/s]

Saved FAISS index, embeddings, and metadata.
