In [1]:
!pip install pandas faiss-cpu transformers torch


Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1

In [2]:
from google.colab import files
import pandas as pd

# Upload the file
uploaded = files.upload()

# Read the Excel file into a DataFrame
filename = list(uploaded.keys())[0]
df = pd.read_excel(filename)

# Display the first few rows of the DataFrame
df.head()


Saving RecruterPilot candidate sample input dataset.xlsx to RecruterPilot candidate sample input dataset.xlsx


Unnamed: 0,Name,Contact Details,Location,Job Skills,Experience,Projects,Comments
0,John Doe,john.doe@example.com,New York,Java; Big Data; Hadoop,5 years,Developed a big data processing system using H...,Strong problem-solving skills.
1,Jane Smith,jane.smith@example.com,San Francisco,JavaScript; React; Node.js,3 years,Built a real-time chat application using React...,Excellent in team collaboration.
2,Bob Johnson,bob.johnson@example.com,New York,Java; Spring; Microservices,6 years,Implemented a microservices architecture for a...,Proactive and detail-oriented.
3,Alice Brown,alice.brown@example.com,Chicago,Python; Django; Machine Learning,4 years,Created a machine learning model for predictiv...,Strong analytical skills.
4,Michael Green,michael.green@example.com,Boston,Ruby; Rails; PostgreSQL,7 years,Developed a scalable web application using Rub...,Great leadership qualities.


In [3]:
import numpy as np

In [4]:
import faiss
from transformers import AutoTokenizer, AutoModel
import torch

# Load a pre-trained transformer model and tokenizer
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to embed text
def embed_text(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings.squeeze().numpy()

# Embed candidate profiles
candidate_profiles = df[['Name','Contact Details','Location', 'Job Skills', 'Experience', 'Projects', 'Comments']].astype(str).agg(' '.join, axis=1)
candidate_embeddings = [embed_text(profile) for profile in candidate_profiles]

# Convert to numpy array
candidate_embeddings = np.array(candidate_embeddings)

# Index embeddings using FAISS
index = faiss.IndexFlatL2(candidate_embeddings.shape[1])
index.add(candidate_embeddings)

print(f"Indexed {index.ntotal} candidate profiles.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Indexed 120 candidate profiles.


In [12]:
def find_matching_candidates(job_description, top_k=10):
    # Embed the job description
    job_embedding = embed_text(job_description).reshape(1, -1)

    # Retrieve top-k candidates using FAISS
    distances, indices = index.search(job_embedding, top_k)

    # Extract candidate profiles
    matching_candidates = df.iloc[indices[0]]
    return matching_candidates

# Example job description
job_description = "Looking for skilled UI Developer to join our dynamic team. The ideal candidate will have a strong background in front-end development, with proficiency in HTML, CSS, JavaScript, and modern frameworks like React or Angular. Your primary responsibility will be to create visually appealing and user-friendly web interfaces that enhance user experience and align with our brand guidelines."


# Find matching candidates
matching_candidates = find_matching_candidates(job_description)
print(matching_candidates)


                  Name                Contact Details          Location  \
96    Tina Cohen-Chang   tina.cohen-chang@outlook.com   Los Angeles, CA   
119        Ian Malcolm        ian.malcolm@hotmail.com   Los Angeles, CA   
106  Yvonne Strahovski  yvonne.strahovski@outlook.com   Los Angeles, CA   
25        Rachel Green       rachel.green@outlook.com  Philadelphia, PA   
31        Rachel Green         rachel.green@yahoo.com   San Antonio, TX   
39          Nancy Drew         nancy.drew@hotmail.com     San Diego, CA   
72           Bob Smith          bob.smith@hotmail.com   Los Angeles, CA   
118      Daisy Johnson      daisy.johnson@outlook.com     San Diego, CA   
54         Ian Malcolm          ian.malcolm@yahoo.com       Houston, TX   
44          Kyle Reese         kyle.reese@hotmail.com        Dallas, TX   

                                            Job Skills Experience  \
96   PostgreSQL, Kubernetes, JavaScript, Machine Le...   11 years   
119     JavaScript, PostgreSQL, Node

In [15]:
while True:
    job_description = input("Enter job description (or type 'exit' to quit): ")
    if job_description.lower() == 'exit':
        break

    matching_candidates = find_matching_candidates(job_description) # Use the existing find_matching_candidates function
    print(matching_candidates)

Enter job description (or type 'exit' to quit): exit
