In [1]:
## Step 3: Embeddings & Vector Database (FAISS)

In [None]:
#pip install pandas sentence-transformers faiss-cpu
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

df = pd.read_csv("cleaned_assessments.csv")
print("Total assessments", len(df))
df.head()


#create text column for embeddings
df["text_for_embedding"] = (df["name"].astype(str) + " " +
                            df["test_type"].astype(str)
                           )

#load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

#generate embeddings
embeddings = model.encode(
    df["text_for_embedding"].tolist(),
    show_progress_bar=True
)

#create FAISS index
dimension = embeddings.shape[1]

import faiss
import numpy as np

index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

#save FAISS index

faiss.write_index(index, "shl_faiss.index")


#Step 4 : FAISS Search

#Import Libraries
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

#load csv
df = pd.read_csv("cleaned_assessments.csv")
index = faiss.read_index("shl_faiss.index")


# load same embedding model

model = SentenceTransformer("all-MiniLM-L6-v2")

#take a requireter query

query = "Hiring a Java developer with good communication skills"

#Convert query to embedding

query_embedding = model.encode([query])

#Search FAISS (top 10)
k = 10
distences, indices = index.search(np.array(query_embedding),k)


#Fetch result from DataFrame
result = df.iloc[indices[0]][
    ['name','url','test_type']
    ]
result

