In [2]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

# Set your Gemini API key , storing the apikey in .env file
os.environ[api_key] = api_key
genai.configure(api_key=os.environ[api_key])
model_gemini = genai.GenerativeModel("gemini-1.5-pro")

In [10]:
import re, ast

def rerank_with_gemini(query, faiss_results, k=10):
    """
    query:                the user’s natural‑language query
    faiss_results:        list of dicts from FAISS search (each dict has keys
                          'name','test_type','duration','remote_testing','adaptive','description')
    k:                    number of top items to return (max 10, min 1)
    """
    # prompt
    prompt = f"""
You are an intelligent recruitment assistant.

Given the following user query:
"{query}"

And the list of assessments below, return the **top {k} most relevant** by their number,
in this format: [3, 1, 2, 5, 4]

Assessments:
"""
    for i, test in enumerate(faiss_results):
        prompt += f"""
#{i+1}
Name: {test['name']}
Type: {test['test_type']}
Duration: {test['duration']}
Remote: {test['remote_testing']}
Adaptive: {test['adaptive']}
Description: {test['description']}
"""

    prompt += f"\nOnly return the top {k} numbers in a Python-style list like: [3, 2, 1, 4, 5]"

    # Calling  Gemini
    try:
        response = model_gemini.generate_content(prompt)
        text = response.text.strip()
        print("Gemini response:", text)

        #Trying safe literal eval first
        try:
            lst = ast.literal_eval(text)
            indices = [i for i in lst if isinstance(i, int)]
        except Exception:
            # if Fallback: split on any non-digit
            parts = re.split(r'\D+', text)
            indices = [int(p) for p in parts if p.isdigit()]

        #Convert to zero-based, if filter out-of-range
        sel = [i-1 for i in indices]
        sel = [i for i in sel if 0 <= i < len(faiss_results)]

    except Exception as e:
        print("Gemini error:", e)
        # Fallback to the original FAISS ordering
        sel = list(range(len(faiss_results)))

    # Clamp to between 1 and k
    n = min(max(len(sel), 1), k)

    #Returning the reranked items
    return [faiss_results[i] for i in sel[:n]]


In [19]:
# %%writefile backend.py
import faiss
import numpy as np
from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
import nest_asyncio
import uvicorn
import pandas as pd
# Allow FastAPI to run in Jupyter's event loop
nest_asyncio.apply()

# Initialize FastAPI app
app = FastAPI()

# Load the FAISS index and embeddings which we have created in other file here 
index = faiss.read_index("shl_index.faiss")
embeddings = np.load("shl_embeddings.npy")

# Loading the SentenceTransformer model all-mpnet-base-v2 
model = SentenceTransformer('all-mpnet-base-v2')

df = pd.read_csv("test_data.xls")
assessment_data = df.to_dict(orient="records")
import re, ast

def rerank_with_gemini(query, faiss_results, k=10):
    """
    query:                the user’s natural‑language query
    faiss_results:        list of dicts from FAISS search (each dict has keys
                          'name','test_type','duration','remote_testing','adaptive','description')
    k:                    number of top items to return (max 10, min 1)
    """
    # prompt
    prompt = f"""
You are an intelligent recruitment assistant.

Given the following user query:
"{query}"

And the list of assessments below, return the **top {k} most relevant** by their number,
in this format: [3, 1, 2, 5, 4]

Assessments:
"""
    for i, test in enumerate(faiss_results):
        prompt += f"""
#{i+1}
Name: {test['name']}
Type: {test['test_type']}
Duration: {test['duration']}
Remote: {test['remote_testing']}
Adaptive: {test['adaptive']}
Description: {test['description']}
"""

    prompt += f"\nOnly return the top {k} numbers in a Python-style list like: [3, 2, 1, 4, 5]"

    # Calling  Gemini
    try:
        response = model_gemini.generate_content(prompt)
        text = response.text.strip()
        print("Gemini response:", text)

        #Trying safe literal eval first
        try:
            lst = ast.literal_eval(text)
            indices = [i for i in lst if isinstance(i, int)]
        except Exception:
            # if Fallback: split on any non-digit
            parts = re.split(r'\D+', text)
            indices = [int(p) for p in parts if p.isdigit()]

        #Convert to zero-based, if filter out-of-range
        sel = [i-1 for i in indices]
        sel = [i for i in sel if 0 <= i < len(faiss_results)]

    except Exception as e:
        print("Gemini error:", e)
        # Fallback to the original FAISS ordering
        sel = list(range(len(faiss_results)))

    # Clamp to between 1 and k
    n = min(max(len(sel), 1), k)

    #Returning the reranked items
    return [faiss_results[i] for i in sel[:n]]

# request body structure for /recommend endpoint
class RecommendRequest(BaseModel):
    query: str

# Define response model
class ProductRecommendation(BaseModel):
    name: str
    url: str
    remote_testing: str
    adaptive: str
    duration: str
    test_type: str

# /recommend endpoint
@app.post("/recommend")
async def recommend(request: RecommendRequest):
    query = request.query
    # Generate embedding
    query_embedding = model.encode([query], convert_to_numpy=True).astype('float32')
    # Ensuring the  correct shape accoriding to our model (1, 768)
    query_embedding = np.array(query_embedding).reshape(1, -1)
    # Search for tje FAISS index
    D, I = index.search(query_embedding, 10)
    
    # top 10 recommended tests
    recommendations = []
    for idx in I[0]:
        row = df.iloc[idx]
        recommendation = {
            "name": row["Test_Name"],
            "url": "https://www.shl.com"+row["Test_Link"],
            "remote_testing": row["Remote_Testing"],
            "adaptive": row["Adaptive_Testing"],
            "duration": row["Assessment_Length"],
            "description":row["Description"],
            "test_type": row["Test_Types"]
        }
        recommendations.append(recommendation)


        
    # Re-rank using Gemini
    reranked_recommendations = rerank_with_gemini(query, recommendations)
    return {"recommendations": reranked_recommendations}

@app.get("/health")
async def health():
    return {"status": "OK"}



Overwriting backend.py


In [15]:
import uvicorn

# Run FastAPI without 'reload=True'
uvicorn.run(app, host="127.0.0.1", port=8000)


INFO:     Started server process [14440]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


Gemini response: [2, 4, 5, 1, 3, 7, 8, 10, 6]
INFO:     127.0.0.1:58115 - "POST /recommend HTTP/1.1" 200 OK
Gemini response: Here are the top 10 most relevant assessments based on the prompt, prioritized for Java development skills and time constraints (under 40 minutes), and presented in a Python list format.  Since adaptive testing isn't required, all suitable Java tests are included.  The business collaboration aspect can't be directly assessed by these technical tests, but would likely be evaluated in a subsequent interview.

```python
[2, 4, 5, 1, 3, 7, 8, 10, 6] 
```

**Reasoning:**

1. **Focus on Java:**  The primary requirement is Java development, so Java-focused assessments are prioritized.
2. **Advanced First:** #2 (Core Java - Advanced) is listed first to identify candidates with a stronger skillset upfront.  #4 (Java 8) and #5 (Java Frameworks) are also high priority for practical Java development skills.
3. **Core Concepts:** #1 (Core Java - Entry Level) and #3 (Java Web 

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [14440]
