## Data Loading

In [2]:
import os
from langchain_community.document_loaders import CSVLoader

In [3]:
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
csv_path = os.path.join(parent_dir, "data", "shl_catalog_clean.csv")

loader = CSVLoader(file_path=csv_path,encoding='utf-8')

In [4]:
docs=loader.load()

In [5]:
for i in range (0,2):
    print(docs[i].page_content)

name: Global Skills Development Report
url: https://www.shl.com/products/product-catalog/view/global-skills-development-report/
remote_support: Yes
adaptive_support: No
test_type: A, E, B, C, D, P
description: This report is designed to be given to individuals who have completed the Global Skills Assessment (GSA). With coverage across the Great 8 Domains, this measure of self-reported behaviors offers a complete overview of their current skills. Participants receive actionable tips on leveraging their top skill strengths and how they might develop their growth skills.
duration: N/A
job_levels: Director, Entry-Level, Executive, General Population, Graduate, Manager, Mid-Professional, Front Line Manager, Supervisor,
name: .NET Framework 4.5
url: https://www.shl.com/products/product-catalog/view/net-framework-4-5/
remote_support: Yes
adaptive_support: Yes
test_type: K
description: The.NET Framework 4.5 test measures knowledge of .NET environment. Designed for experienced users, this test 

## Data Chunking

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [7]:
splitter=RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=10
)
splitted_text=splitter.split_documents(docs)
splitted_text[0]

Document(metadata={'source': 'c:\\Users\\Ankit\\Desktop\\SHL-GenAI-Assessment-Recommendation\\data\\shl_catalog_clean.csv', 'row': 0}, page_content='name: Global Skills Development Report\nurl: https://www.shl.com/products/product-catalog/view/global-skills-development-report/\nremote_support: Yes\nadaptive_support: No\ntest_type: A, E, B, C, D, P\ndescription: This report is designed to be given to individuals who have completed the Global Skills Assessment (GSA). With coverage across the Great 8 Domains, this measure of self-reported behaviors offers a complete overview of their current skills. Participants receive actionable tips on leveraging their top skill strengths and how they might develop their growth skills.\nduration: N/A\njob_levels: Director, Entry-Level, Executive, General Population, Graduate, Manager, Mid-Professional, Front Line Manager, Supervisor,')

## Embedding the chunked documents

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

os.chdir("..")
persist_dir = "app/chroma_shl_db"

vectordb = Chroma.from_documents(
    documents=splitted_text,
    embedding=embedding_model,
    persist_directory=persist_dir
)

print("ChromaDB created and saved successfully!")


ChromaDB created and saved successfully!


##  Retriever

In [15]:
from app.utils.get_retriever import get_retriever
query = "ICICI Bank Assistant Admin, Experience required 0-2 years, test should be 30-40 mins long"
results =get_retriever("app/chroma_shl_db").invoke(query)


print(f"\n Query: {query}")
print(f"Top {len(results)} retrieved documents:\n")

for i, doc in enumerate(results, 1):
    print(f"Result {i}:")
    print(doc.page_content)
    print("-" * 80)



 Query: ICICI Bank Assistant Admin, Experience required 0-2 years, test should be 30-40 mins long
Top 10 retrieved documents:

Result 1:
Completion time is 36 minutes for the test itself, plus 10 minutes for instructions and practice.
duration: 36
job_levels: Graduate, Manager, Mid-Professional, Professional Individual Contributor,
--------------------------------------------------------------------------------
Result 2:
name: Financial and Banking Services (New)
url: https://www.shl.com/products/product-catalog/view/financial-and-banking-services-new/
remote_support: Yes
adaptive_support: No
test_type: K
description: Multi-choice test that measures the knowledge of investment products, banking products, taxation and principles of Macroeconomics.
duration: 9
job_levels: Graduate, Manager, Mid-Professional, Professional Individual Contributor, Supervisor,
--------------------------------------------------------------------------------
Result 3:
name: Automata Pro (New)
url: https://www

In [16]:
valid_results = []
for doc in results:
    if "name:" in doc.page_content and "url:" in doc.page_content:
        valid_results.append(doc)

print(f"✅ {len(valid_results)} valid documents after filtering.")


✅ 5 valid documents after filtering.


In [17]:
valid_results

[Document(metadata={'source': 'c:\\Users\\Ankit\\Desktop\\SHL-GenAI-Assessment-Recommendation\\data\\shl_catalog_clean.csv', 'row': 113}, page_content='name: Financial and Banking Services (New)\nurl: https://www.shl.com/products/product-catalog/view/financial-and-banking-services-new/\nremote_support: Yes\nadaptive_support: No\ntest_type: K\ndescription: Multi-choice test that measures the knowledge of investment products, banking products, taxation and principles of Macroeconomics.\nduration: 9\njob_levels: Graduate, Manager, Mid-Professional, Professional Individual Contributor, Supervisor,'),
 Document(metadata={'row': 39, 'source': 'c:\\Users\\Ankit\\Desktop\\SHL-GenAI-Assessment-Recommendation\\data\\shl_catalog_clean.csv'}, page_content='name: Automata Pro (New)\nurl: https://www.shl.com/products/product-catalog/view/automata-pro-new/\nremote_support: Yes\nadaptive_support: No\ntest_type: S\ndescription: An AI-powered coding simulation assessment that evaluates candidate’s progr