In [None]:
import json
from langchain.schema import Document

# Load your JSON data
with open("shl_recommended_assessments.json", "r", encoding="utf-8") as f:
    data = json.load(f)
assessments = data.get("recommended_assessments", data)

docs = []
for item in assessments:
    # Metadata: all fields, lists as comma-separated strings
    metadata = {}
    for key, value in item.items():
        if isinstance(value, list):
            value_str = ", ".join(str(v) for v in value)
        else:
            value_str = str(value)
        metadata[key] = value_str

    # Page content: all fields except url
    lines = []
    for key, value in item.items():
        if key == "url":
            continue
        if isinstance(value, list):
            value_str = ", ".join(str(v) for v in value)
        else:
            value_str = str(value)
        lines.append(f"{key}: {value_str}")
    page_content = " | ".join(lines)
    docs.append(Document(page_content=page_content, metadata=metadata))


In [None]:
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
import os

os.environ["PINECONE_API_KEY"] = "pcsk_5jw9QX_FBu8iLpSYLHRTHrPDjtNgdcHTSGGjf3Keo97ovDzAKNkWRZoHVT7HP81b3vLayT"

# Example embedding object (using all-mpnet-base-v2)
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Upsert docs to Pinecone in one step
vectorstore = PineconeVectorStore.from_documents(
    docs,
    embedding=embedding,
    index_name="shl2"  # Your Pinecone index name
)


In [36]:
# Example: vectorstore = PineconeVectorStore.from_existing_index(index_name="your-index")
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})



In [97]:
from langchain_mistralai.chat_models import ChatMistralAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_cohere.rerank import CohereRerank
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever

model = ChatMistralAI(mistral_api_key="ChfKt3JpOPaWpnwZ1FVjFZQn3tGnPGij")



prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant for recommending SHL assessments.

Given the following SHL assessment documents, return a JSON object with the key "recommended_assessments", whose value is a list of up to 10 recommended assessments. 
Each assessment should include all the fields present in the input (including lists), and the output should be pretty-printed and valid JSON, matching this example:

{{
    "recommended_assessments": [
        {{
            "url": "...",
            "adaptive_support": "...",
            "description": "...",
            "duration": ...,
            "remote_support": "...",
            "test_type": "...",
            "job_levels": [...],
            "languages": [...]
        }},
        {{
            "url": "...",
            "adaptive_support": "...",
            "description": "...",
            "duration": ...,
            "remote_support": "...",
            "test_type": "...",
            "job_levels": [...],
            "languages": [...]
        }}
        // up to 10 items
    ]
}}

<context>
{context}
</context>

Question: {input}

Only include information from the provided context. If there are no relevant assessments, return an empty list.
""")


In [None]:

document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)


In [99]:
query = "Recommend a test for mid-professional level candidates that evaluates programming skills"

retrieval_chain.invoke({"input": query})

{'input': 'Recommend a test for mid-professional level candidates that evaluates programming skills',
 'context': [Document(id='12a87632-e766-412c-86b1-aa6c64348fd8', metadata={'adaptive_support': 'No', 'description': 'An AI-powered coding simulation assessment that evaluates candidate’s programming ability. Offers a familiar IDE environment available in over 40 different programming languages and tests candidates using real-world coding problems.Your use of this assessment product may be subject to New York City Law 144 (Regulation of the Use of Automated Employment Decision Tools) (dated July 5, 2023). Compliance with Law 144 is your responsibility.\r\nRead more on https://www.shl.com/legal/shl-us-regulatory-compliance/', 'duration': '0', 'job_levels': 'Mid-Professional, Professional Individual Contributor', 'languages': 'English (USA)', 'remote_support': 'Yes', 'test_type': 'Simulations', 'url': 'https://www.shl.com/solutions/products/product-catalog/view/automata-pro-new/'}, page_c