In [2]:
GEMINI_API_KEY = "xxx"

In [None]:
%pip install -qU google-generativeai langchain_chroma google-cloud-aiplatform langchain_community langchain_google_genai langchain_google_vertexai

In [8]:
import os
import json
import google.generativeai as genai
from langchain_chroma import Chroma
from langchain.schema import Document
from pydantic import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings

os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY
genai.configure(api_key=GEMINI_API_KEY)

## Initial Setup and Inputs

In [9]:
# Sample candidate descirpitons
candidate_description1 = "A first year Bachelors computer science student called candidate1."
candidate_description2 = "A second year Bachelors computer science student called candidate2. He has experience working as a software enginner at Apple."
candidate_description3 = "A third year Bachelors computer science student called candidate3. He has experience working as a AI enginner at OpenAI and Apple."
candidate_description4 = """ A highly skilled and results-driven professional, candidate4 holds a Master of Science in Computer Science and Engineering from NYU Tandon School of Engineering with a stellar GPA of 3.945. With over a year of professional experience, he specializes in software development, artificial intelligence, and data science. He is proficient in Python, C++, Java, and SQL, and adept at utilizing frameworks like PyTorch, TensorFlow, and LangChain. He has held pivotal roles such as AI Engineer Intern at Radical AI, Machine Learning Specialist at AI for Scientific Research, and Software Development Engineer at Adobe Inc., contributing to projects like Ethereum fraud detection and solar energy production prediction. His technical acumen, coupled with hands-on experience in scalable solutions, positions him as a valuable asset in the technology sector."""
candidate_description5 = """Candidate5, A passionate and talented music professional with a strong academic background in Music Performance and Production. With expertise spanning the music and entertainment industry, they specialize in music composition, sound design, live performance, and audio engineering. Proficient in tools such as Pro Tools, Logic Pro X, Ableton Live, and Adobe Audition, they bring a deep understanding of recording, mixing, and mastering.

They have gained valuable experience through roles such as Studio Engineer Intern at Universal Music Group, Music Production Assistant at Sony Music Entertainment, and Live Sound Technician for prominent venues. Their career highlights include collaborating on Grammy-nominated tracks, composing original scores for independent films, and managing audio for live events featuring renowned artists.

With a proven ability to blend creativity with technical precision, they are affiliated with prestigious organizations like Berklee College of Music and have worked alongside iconic brands such as Warner Music Group and Spotify. This candidate’s diverse skill set, industry knowledge, and innovative approach make them a sought-after professional in the ever-evolving music landscape."""

In [10]:
# sample Job description
job_description = """ About the job
Who we are:

We are the two-man army of recovering consultants fighting the war on admin work in consulting and banking using AI as our not-so-secret weapon
Founded by alumni from Cornell University, University of Chicago, and a top 5 strategy consulting firm
Accepted into Antler’s NYC residency program (3% acceptance rate)
What we do:

We make it easier for investment professionals (e.g., those in private equity, hedge funds, consulting) to find experts with the right backgrounds for primary research
We are basically salesforce for expert sourcing
Our beachhead product is a dashboard that reads users' emails, saving them 10+ hours a week.
Location:

Union Square, NYC
Why You Should Join

You'll work directly with AI tools and not only see, but guide how they're used at some of the biggest firms in the world
Be part of our journey as we raise a seed round in the coming months which will mean compensation for yourself
You’ll have the chance to meet dozens of investors and other successful founders as we work out of an accelerator's office
Access to founders’ network
Upon successful completion of the internship, you’ll be considered for our founding engineer team, which would come with a large stake in the company
Any ideas you have will be heard directly by the CTO; our philosophy is, if you can build it, we'll test it
All we care about is delivery, so you can work whenever and wherever you want outside of our goal-setting meetings
What You Will Do

Build a RAG tool for our database of experts
Find and patch security vulnerabilities
Help out with tech debt on our beachhead product
Tell us when we're being dumb and where you think it could be done better
What You Need

2+ years of coursework in Computer Science, Mathematics, Statistics, or a related field.
Preferably, knowledge of all or any of the following:
Languages: Python, Golang, Dart.
Frameworks: Flutter.
Other: AWS, Firebase, Supabase.
A desire to work hard and figure things out
Need to be located in NYC
What We Expect From You
In-person meetings: 2-3 times a week in NYC, the three of us will align on expectations and cover progress.
"""

In [11]:
# Prompts for generating candidate, job description summary in keywords
prompt_candidate_description = "Candidate Description: {candidate_description}.\n Provide detailed profile of this candidates background, industry, skills, experience, and company affiliations. Give output as a list of keywords only for each category along with candidates name"
prompt_job_description = "Job Description: {job_description}.\n Provide detailed profile of an ideal candidate background, industry, skills, experience, and company affiliations for this job description. Give output as a list of keywords only for each category"


# Prompt templates
candidate_description_summary_prompt = PromptTemplate(
                                          template=prompt_candidate_description,
                                          input_variables=["candidate_description"],
                                      )

job_description_summary_prompt = PromptTemplate(
                                    template=prompt_job_description,
                                    input_variables=["job_description"],
                                )

In [12]:
# Gemini 1.5 Pro LLM
llm = GoogleGenerativeAI(model="gemini-1.5-pro")

## Process Candidates Descriptions

In [13]:
# Process candidate descriptions and generate keyword summaries
candidate_description_keyword_summaries = []

# LLM chain to generate candidate description summary
candidate_description_summary_chain = candidate_description_summary_prompt | llm

for candidate_description in [candidate_description1, candidate_description2, candidate_description3, candidate_description4, candidate_description5]:

  # inputs to the LLM chain
  input = {"candidate_description": candidate_description}

  # Invole LLM chain
  candidate_description_summary = candidate_description_summary_chain.invoke(input)

  # Store Candidate Description Summaries
  candidate_description_keyword_summaries.append(candidate_description_summary)

In [14]:
print(candidate_description_keyword_summaries[0])

**Candidate Name:** candidate1

**Background:**

* First-Year
* Bachelor's Degree
* Computer Science

**Industry:** (Likely none, given first-year status)

* Entry-Level 
* Student


**Skills:** (Likely foundational, depending on coursework)

* Programming Fundamentals (Potentially specific languages like Python, Java, C++)
* Basic Data Structures and Algorithms
* Problem-Solving
* Logical Thinking
* (Possibly) Version Control (Git)


**Experience:** (Likely limited)

* Academic Projects
* (Possibly) Personal Projects 
* (Possibly) Internships (if any)


**Company Affiliations:** (Likely none)


**Important Note:**  This profile is based on the extremely limited information provided. A first-year student's actual skills and experience will vary considerably.  To create a more accurate profile,  significantly more detail is required.



## Create a vector store

In [15]:
# store the candidate description summary in vectorDB
vector_store = Chroma
embedding_model = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

# Create documents from summary strings.
documents = []
for candidate_description_summary in candidate_description_keyword_summaries:
  documents.append(Document(page_content=candidate_description_summary))

# Embed and Store the documents in vectorDB
candidate_summary_vectorstore = vector_store.from_documents(documents, embedding_model)

## Process Job Description

In [16]:
# Process job descriptions and generate keyword summary of the requiremetns
job_input = {"job_description": job_description}

# LLM chain to generate job description summary of the requiremetns
job_description_summary_chain = job_description_summary_prompt | llm

# Invole LLM chain
job_description_summary = job_description_summary_chain.invoke(job_input)

In [17]:
print(job_description_summary)

**Background:**

* Computer Science
* Mathematics
* Statistics
* Engineering

**Industry:**

* Software Development
* AI/ML
* Fintech
* Consulting
* Investment Banking

**Skills:**

* Python
* Golang
* Dart
* Flutter
* AWS
* Firebase
* Supabase
* RAG (Retrieval Augmented Generation)
* Security Vulnerability Detection
* Tech Debt Management
* Problem-Solving
* Communication

**Experience:**

* 2+ years coursework in relevant field
* Internship experience (preferred)
* Project experience with listed technologies

**Company Affiliations:**

* University/College (Computer Science, Mathematics, Statistics, or related programs)
* Open-source projects contributions
* Hackathons
* Tech clubs/communities



## Identify the relevent candidates and generate scores


In [18]:
# Output format
class CandidateScore(BaseModel):
    candidate_name: str = Field(description="The name of the candidate")

    background_score: int = Field(description="A score out of 100 for the candidate's background")
    industry_score: int = Field(description="A score out of 100 for the candidate's industry")
    skills_score: int = Field(description="A score out of 100 for the candidate's skills")
    experience_score: int = Field(description="A score out of 100 for the candidate's experience")
    company_affiliations_score: int = Field(description="A score out of 100 for the candidate's company affiliations")
    total_score: int = Field(description="A score out of 100 for the candidate's total score")

    background_description: str = Field(description="A description of the candidate's background")
    industry_description: str = Field(description="A description of the candidate's industry")
    skills_description: str = Field(description="A description of the candidate's skills")
    experience_description: str = Field(description="A description of the candidate's experience")
    company_affiliations_description: str = Field(description="A description of the candidate's company affiliations")

    next_steps: str = Field(description="A description of the candidate's next steps")

# output parser
output_parser = JsonOutputParser(pydantic_object=CandidateScore)

In [19]:
# Prompt for scoring each candidate against job description
prompt_candidate_score = """Job description profile in keywords: {jd}.\n I have a candidate with candidate profile in keywords as: {cd} \n. Tell me if this candidate is an ideal fit or not for this job. Evaluate this candidate on background, industry, skills, experice, company affliations of this candidate. Give me a score out of 100 for this candidate for this job.
You must respond as a JSON object:
{format_instructions}"""

# PromptTemplate for scoring each candidate against job description
prompt = PromptTemplate(
            template=prompt_candidate_score,
            input_variables=["jds", "cds"],
            partial_variables={"format_instructions": output_parser.get_format_instructions()}
        )

In [20]:
candidate_scores = []

# Retrive top100 candidates using similarity search of candidate description summary against job description summary
results = candidate_summary_vectorstore.similarity_search_with_score(job_description_summary, k=100)

# LLM chain to generate score
score_gen_chain = prompt | llm | output_parser

# Use LLMs to generate score each candidate against job description
for doc, score in results:

    # inputs candidate_description_summary, job_description_summary
    candidate_description_summary = doc.page_content
    input = {"cd": candidate_description_summary, "jd": job_description_summary}

    # generate candidate scores
    score = score_gen_chain.invoke(input)

    # store candidate scores
    candidate_scores.append(score)



In [21]:
print(candidate_scores[0])

{'candidate_name': 'Candidate4', 'background_score': 95, 'industry_score': 90, 'skills_score': 75, 'experience_score': 70, 'company_affiliations_score': 80, 'total_score': 82, 'background_description': "Candidate4 has a strong academic background with a Master of Science in Computer Science and Engineering from a reputable institution (NYU Tandon) and a high GPA. This aligns perfectly with the job description's requirements for a background in Computer Science, Mathematics, Statistics, or Engineering.", 'industry_description': "The candidate's experience in Technology, Software Development, Artificial Intelligence, Data Science, and Machine Learning is highly relevant to the target industries of Software Development, AI/ML, Fintech, Consulting, and Investment Banking.  The focus on AI/ML and Data Science is particularly valuable.", 'skills_description': 'Candidate4 possesses several desirable skills, including Python, which is explicitly mentioned in the job description.  While the can

## Rank the candidates for the job description

In [22]:
# Calculates a weighted score for a candidate based on multiple criteria.
def calculate_weighted_score(candidate_data):

  # weights for each category
  experience_weight = 0.30
  skills_weight = 0.20
  background_weight = 0.20
  industry_weight = 0.20
  company_affiliations_weight = 0.10

  # calculate weighted score
  weighted_score = (
      candidate_data["experience_score"] * experience_weight
      + candidate_data["skills_score"] * skills_weight
      + candidate_data["background_score"] * background_weight
      + candidate_data["industry_score"] * industry_weight
      + candidate_data["company_affiliations_score"] * company_affiliations_weight
  )

  return weighted_score

In [23]:
# rank the candidates based in score
ranked_candidates = sorted(candidate_scores, key=calculate_weighted_score, reverse=True)

# Print or further process the ranked outputs
for i, candidate in enumerate(ranked_candidates):
    print(f"Rank {i + 1}:")
    print(f"Name: {candidate['candidate_name']}")
    print(f"Background:")
    print(f"  Score: {candidate['background_score']}")
    print(f"  Description: {candidate['background_description']}\n")
    print(f"Industry:")
    print(f"  Score: {candidate['industry_score']}")
    print(f"  Description: {candidate['industry_description']}\n")
    print(f"Skills:")
    print(f"  Score: {candidate['skills_score']}")
    print(f"  Description: {candidate['skills_description']}\n")
    print(f"Experience:")
    print(f"  Score: {candidate['experience_score']}")
    print(f"  Description: {candidate['experience_description']}\n")
    print(f"Company Affiliations:")
    print(f"  Score: {candidate['company_affiliations_score']}")
    print(f"  Description: {candidate['company_affiliations_description']}\n")
    print(f"Next Steps: {candidate['next_steps']}\n")
    print("=========================================================================")

Rank 1:
Name: Candidate4
Background:
  Score: 95
  Description: Candidate4 has a strong academic background with a Master of Science in Computer Science and Engineering from a reputable institution (NYU Tandon) and a high GPA. This aligns perfectly with the job description's requirements for a background in Computer Science, Mathematics, Statistics, or Engineering.

Industry:
  Score: 90
  Description: The candidate's experience in Technology, Software Development, Artificial Intelligence, Data Science, and Machine Learning is highly relevant to the target industries of Software Development, AI/ML, Fintech, Consulting, and Investment Banking.  The focus on AI/ML and Data Science is particularly valuable.

Skills:
  Score: 75
  Description: Candidate4 possesses several desirable skills, including Python, which is explicitly mentioned in the job description.  While the candidate lacks experience with Golang, Dart, Flutter, AWS, Firebase, and Supabase, their experience with C++, Java, SQL