In [1]:
import pandas as pd
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings


In [2]:
# Load test queries
test_df = pd.read_csv("test_data.csv")

test_df.head()


Unnamed: 0,Query
0,Looking to hire mid-level professionals who ar...
1,Job Description\r\n\r\n Join a community that ...
2,I am hiring for an analyst and wants applicati...
3,I have a JD Job Description\r\n\r\n People Sci...
4,I am new looking for new graduates in my sales...


In [3]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vector_db = Chroma(
    persist_directory="shl_vector_db",
    embedding_function=embeddings
)


  embeddings = HuggingFaceEmbeddings(
  vector_db = Chroma(


In [4]:
def balanced_recommendations(results, final_k=10):
    type_dict = {"K": [], "P": [], "A": []}
    
    for r in results:
        ttype = r.metadata.get("test_type", "K")
        if ttype in type_dict:
            type_dict[ttype].append(r)

    num_types = len([v for v in type_dict.values() if v])
    per_type = max(final_k // num_types, 1)

    balanced = []
    for items in type_dict.values():
        balanced.extend(items[:per_type])

    if len(balanced) < final_k:
        remaining = [r for r in results if r not in balanced]
        balanced.extend(remaining[:final_k - len(balanced)])

    return balanced[:final_k]


In [5]:
predictions = []

for query in test_df["Query"]:
    results = vector_db.similarity_search(query, k=30)
    balanced = balanced_recommendations(results, final_k=10)

    urls = [r.metadata.get("url", "") for r in balanced]
    
    predictions.append(" | ".join(urls))  # separator


In [6]:
submission_df = pd.DataFrame({
    "query": test_df["Query"],
    "predictions": predictions
})

submission_df.head()


Unnamed: 0,query,predictions
0,Looking to hire mid-level professionals who ar...,https://www.shl.com/products/product-catalog/v...
1,Job Description\r\n\r\n Join a community that ...,https://www.shl.com/products/product-catalog/v...
2,I am hiring for an analyst and wants applicati...,https://www.shl.com/products/product-catalog/v...
3,I have a JD Job Description\r\n\r\n People Sci...,https://www.shl.com/products/product-catalog/v...
4,I am new looking for new graduates in my sales...,https://www.shl.com/products/product-catalog/v...


In [7]:
submission_df.to_csv(
    "drishti_prakash.csv",
    index=False
)
