In [None]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
import pandas as pd

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


In [None]:
vector_db = Chroma(
    persist_directory="shl_vector_db",
    embedding_function=embeddings
)

print("Vector DB loaded successfully!")


Vector DB loaded successfully!


In [None]:
def keyword_boost(results, query):
    query_lower = query.lower()
    boosted = []

    keywords = [
        "java", "python", "sql", "javascript",
        "developer", "coding", "programming",
        "collaboration", "team", "personality",
        "cognitive", "behavioral", "aptitude"
    ]

    for r in results:
        score = 0
        text = (r.page_content + " " + r.metadata.get("name", "")).lower()

        for kw in keywords:
            if kw in query_lower and kw in text:
                score += 1

        boosted.append((score, r))

    boosted.sort(key=lambda x: x[0], reverse=True)
    return [r for _, r in boosted]


In [None]:
def smart_recommendations(results, query, final_k=10):
    boosted_results = keyword_boost(results, query)

    final = []
    seen_urls = set()

    for r in boosted_results:
        url = r.metadata["url"]
        if url not in seen_urls:
            final.append(r)
            seen_urls.add(url)

        if len(final) == final_k:
            break

    return final


In [None]:
def soft_balance(results, final_k=10):
    tech, behavioral, cognitive = [], [], []

    for r in results:
        t = r.metadata.get("test_type", "").upper()
        if t == "K":
            tech.append(r)
        elif t == "P":
            behavioral.append(r)
        else:
            cognitive.append(r)

    output = []
    output.extend(tech[:4])
    output.extend(behavioral[:3])
    output.extend(cognitive[:3])

    if len(output) < final_k:
        leftovers = tech + behavioral + cognitive
        for r in leftovers:
            if r not in output:
                output.append(r)
            if len(output) == final_k:
                break

    return output[:final_k]


In [None]:
query = "Java developer with strong collaboration skills"

results = vector_db.similarity_search(query, k=50)
final_results = smart_recommendations(results, query, final_k=10)

for r in final_results:
    print(r.metadata["name"], "|", r.metadata["test_type"])


Java 2 Platform Enterprise Edition 1.4 Fundamental | K
JavaScript (New) | K
Java 8 (New) | K
Core Java (Advanced Level) (New) | K
Java Platform Enterprise Edition 7 (Java EE 7) | K
Core Java (Entry Level) (New) | K
Enterprise Java Beans (New) | K
Java Web Services (New) | K
Java Frameworks (New) | K
Java Design Patterns (New) | K


In [None]:
output = []

for r in final_results:
    output.append({
        "Assessment Name": r.metadata["name"],
        "URL": r.metadata["url"],
        "Test Type": r.metadata["test_type"]
    })

pd.DataFrame(output)


Unnamed: 0,Assessment Name,URL,Test Type
0,Java 2 Platform Enterprise Edition 1.4 Fundame...,https://www.shl.com/products/product-catalog/v...,K
1,JavaScript (New),https://www.shl.com/products/product-catalog/v...,K
2,Java 8 (New),https://www.shl.com/products/product-catalog/v...,K
3,Core Java (Advanced Level) (New),https://www.shl.com/products/product-catalog/v...,K
4,Java Platform Enterprise Edition 7 (Java EE 7),https://www.shl.com/products/product-catalog/v...,K
5,Core Java (Entry Level) (New),https://www.shl.com/products/product-catalog/v...,K
6,Enterprise Java Beans (New),https://www.shl.com/products/product-catalog/v...,K
7,Java Web Services (New),https://www.shl.com/products/product-catalog/v...,K
8,Java Frameworks (New),https://www.shl.com/products/product-catalog/v...,K
9,Java Design Patterns (New),https://www.shl.com/products/product-catalog/v...,K
