In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

def load_and_prepare_data(url):
    """Loads data from the Google Sheet, prints column names, and cleans the data."""
    print("Loading and preparing data...")
    # Format the URL for direct CSV export
    csv_url = url.replace('/edit?usp=sharing', '/export?format=csv')
    df = pd.read_csv(csv_url)
    
    # --- DIAGNOSTIC STEP ---
    # Print the actual column names found in your file
    print(f"\nColumns found in file: {df.columns.tolist()}\n")
        
    # Fill any missing job titles with a neutral placeholder
    df['job_title'] = df['job_title'].fillna('No title specified')
    
    print(f"✅ Data loaded and prepared. Found {len(df)} candidates.\n")
    return df

def create_embeddings(texts, model):
    """Converts a list of texts into numerical vectors (embeddings)."""
    print("Creating vector embeddings for all job titles...")
    embeddings = model.encode(texts, show_progress_bar=True)
    print("✅ Embeddings created.\n")
    return embeddings

def rank_candidates(query_vector, candidate_vectors, candidate_data):
    """Ranks candidates based on cosine similarity to the query vector."""
    # Reshape query_vector to be a 2D array for the function
    similarities = cosine_similarity(query_vector.reshape(1, -1), candidate_vectors)[0]
    
    # Get the indices of the candidates sorted by similarity
    ranked_indices = np.argsort(similarities)[::-1]
    
    # Create a ranked DataFrame
    ranked_df = candidate_data.iloc[ranked_indices].copy()
    ranked_df['fit_score'] = similarities[ranked_indices]
    
    return ranked_df

# --- Main Execution ---

# 1. Data Setup
data_url = 'https://docs.google.com/spreadsheets/d/117X6i53dKiO7w6kuA1g1TpdTlv1173h_dPlJt5cNNMU/edit?usp=sharing'
search_query = "Aspiring human resources"
# Use a pre-trained NLP model for creating high-quality embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# 2. Load Data and Create Embeddings
candidates_df = load_and_prepare_data(data_url)
candidate_embeddings = create_embeddings(candidates_df['job_title'].tolist(), model)
query_embedding = model.encode(search_query)

# 3. Initial Ranking
print(f"--- Initial Ranking for query: '{search_query}' ---")
initial_ranking = rank_candidates(query_embedding, candidate_embeddings, candidates_df)
print(initial_ranking[['id', 'job_title', 'location', 'connection', 'fit_score']].head(10))
print("-" * 50, "\n")

# 4. Simulate User Feedback and Re-rank
# Let's assume the reviewer stars the 7th candidate in the list (index 6)
starred_candidate_index = initial_ranking.index[6]
starred_candidate_vector = candidate_embeddings[starred_candidate_index]
starred_candidate_details = candidates_df.loc[starred_candidate_index]

print(f"--- User starred a candidate: ---\n{starred_candidate_details}\n")
print("Re-ranking all candidates based on this ideal profile...\n")

# Create a new query vector by blending the original query with the starred candidate
new_query_embedding = (0.3 * query_embedding) + (0.7 * starred_candidate_vector)

# 5. Re-ranking
print("--- New, Re-ranked List ---")
reranked_list = rank_candidates(new_query_embedding, candidate_embeddings, candidates_df)
# Notice how candidates similar to the starred one are now at the top
print(reranked_list[['id', 'job_title', 'location', 'connection', 'fit_score']].head(10))
print("-" * 50)


Loading and preparing data...

Columns found in file: ['id', 'job_title', 'location', 'connection', 'fit']

✅ Data loaded and prepared. Found 104 candidates.

Creating vector embeddings for all job titles...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Embeddings created.

--- Initial Ranking for query: 'Aspiring human resources' ---
    id                              job_title  \
96  97  Aspiring Human Resources Professional   
32  33  Aspiring Human Resources Professional   
2    3  Aspiring Human Resources Professional   
20  21  Aspiring Human Resources Professional   
16  17  Aspiring Human Resources Professional   
57  58  Aspiring Human Resources Professional   
45  46  Aspiring Human Resources Professional   
48  49    Aspiring Human Resources Specialist   
59  60    Aspiring Human Resources Specialist   
5    6    Aspiring Human Resources Specialist   

                               location connection  fit_score  
96                 Kokomo, Indiana Area         71   0.949807  
32  Raleigh-Durham, North Carolina Area         44   0.949807  
2   Raleigh-Durham, North Carolina Area         44   0.949807  
20  Raleigh-Durham, North Carolina Area         44   0.949807  
16  Raleigh-Durham, North Carolina Area         44   0.