In [109]:
import json
import os
from typing import List
import networkx as nx
import nltk
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from annotated_text import annotated_text, parameters
from streamlit_extras import add_vertical_space as avs
from streamlit_extras.badges import badge
from scripts.similarity import get_similarity_score, find_path, read_config
from scripts.utils import get_filenames_from_dir
from scripts import ReadPdf, JobDescriptionProcessor, ResumeProcessor, KeytermsExtraction
import cohere
from scripts.KeytermsExtraction import KeytermExtractor
from scripts.similarity.get_similarity_score import get_similarity_score



In [110]:
import os

job_desc_directory = "Data/JobDescription/"
resumes_directory = "Data/Resumes/"

# Automatically get all job description and resume files
job_desc_files = [file for file in os.listdir(job_desc_directory) if file.endswith('.pdf')]
resume_files = [file for file in os.listdir(resumes_directory) if file.endswith('.pdf')]

job_desc_file = job_desc_files[0]  # This will select the first job description file


In [111]:
# Define paths
# Read raw job description
job_desc_text = ReadPdf.read_single_pdf("Data/JobDescription/" + job_desc_file)

# Process job description
job_desc_processor = JobDescriptionProcessor(job_desc_file)
job_desc_processed = job_desc_processor._read_job_desc()

# Read and process resumes
resumes_text = [ReadPdf.read_single_pdf("Data/Resumes/" + resume_file) for resume_file in resume_files]
resumes_processed = []
for resume_text in resumes_text:
    resume_processor = ResumeProcessor(resume_text)
    resume_data = resume_processor._read_resumes()
    resumes_processed.append(resume_data)




Error reading file 'Data/Resumes/JOHN DOE
123 Main St, Anywhere, USA — (123) 456-7890 — john.doe@email.com
LinkedIn: linkedin.com/in/johndoe — GitHub: github.com/johndoe
PROFESSIONAL SUMMARY
Highly skilled Full Stack Developer with over 5 years of experience in Java and Angular development,
specializing in designing, building, testing, and maintaining web applications. Proficient in an assortment
of technologies, including Java, Spring Boot, Angular, HTML5, CSS3, and SQL. Exceptional ability to
work in a team and self-direct. Committed to providing high-quality results with little supervision.
SKILLS
•Java and J2EE
•Spring Boot, Spring MVC, and Hibernate
•Angular (versions 2+)
•JavaScript, TypeScript, HTML5, CSS3, and Bootstrap
•RESTful APIs
•SQL and NoSQL databases (MySQL, MongoDB)
•Agile and Scrum
•Git and GitHub
•Junit and Mockito
•Docker
WORK EXPERIENCE
Full Stack Java Developer , ABC Company, Inc., Anywhere, USA, June 2018 - Present
•Developed scalable, robust, and maintainable en

In [112]:
def extract_candidate_name_from_filename(filename: str) -> str:
    name_parts = filename.split('_')[:-1]  # Exclude the last part which is the position
    return ' '.join(name_parts).title()

In [113]:

# Initialize the KeytermExtractor for the job description
job_desc_extractor = KeytermExtractor(job_desc_text)

# Extract key terms using the TextRank algorithm (or any other method you prefer)
job_desc_keywords = job_desc_extractor.get_keyterms_based_on_textrank()

# Extract key terms for each resume
resumes_keywords = []
for resume_text in resumes_text:
    resume_extractor = KeytermExtractor(resume_text)
    resume_keywords = resume_extractor.get_keyterms_based_on_textrank()
    resumes_keywords.append(resume_keywords)




In [114]:
# # Initialize Cohere client
cohere_api_key = "SfyA1J6aBy0xiunhxzGFnATHTorhmx0Qemkshpls"  # Replace with your API key
co = cohere.Client(cohere_api_key)


In [115]:

# Convert the extracted keywords into strings
job_desc_keywords_str = ' '.join([keyword[0] for keyword in job_desc_keywords])
resumes_keywords_str_list = [' '.join([keyword[0] for keyword in resume_keywords]) for resume_keywords in resumes_keywords]

# Calculate similarities between the job description and each resume using Qdrant and Cohere
similarities = []
for resume_keywords_str in resumes_keywords_str_list:
    similarity_result = get_similarity_score(resume_keywords_str, job_desc_keywords_str)
    # Extract the similarity score from the result (assuming the score is what you need)
    score = similarity_result[0]['score']  # Modify this line if the structure of similarity_result is different
    similarities.append(score)


2023-10-03 16:09:54,179 - scripts.similarity.get_similarity_score - INFO - Started getting similarity score


collection name=resume_collection_name


2023-10-03 16:09:56,132 - scripts.similarity.get_similarity_score - INFO - Finished getting similarity score
2023-10-03 16:09:56,132 - scripts.similarity.get_similarity_score - INFO - Started getting similarity score


collection name=resume_collection_name


2023-10-03 16:09:57,323 - scripts.similarity.get_similarity_score - INFO - Finished getting similarity score
2023-10-03 16:09:57,323 - scripts.similarity.get_similarity_score - INFO - Started getting similarity score


collection name=resume_collection_name


2023-10-03 16:09:58,983 - scripts.similarity.get_similarity_score - INFO - Finished getting similarity score
2023-10-03 16:09:58,983 - scripts.similarity.get_similarity_score - INFO - Started getting similarity score


collection name=resume_collection_name


2023-10-03 16:10:00,380 - scripts.similarity.get_similarity_score - INFO - Finished getting similarity score
2023-10-03 16:10:00,380 - scripts.similarity.get_similarity_score - INFO - Started getting similarity score


collection name=resume_collection_name


2023-10-03 16:10:01,055 - scripts.similarity.get_similarity_score - INFO - Finished getting similarity score


In [116]:
job_desc_str = ' '.join([keyword[0] for keyword in job_desc_keywords])
resumes_str_list = [' '.join([keyword[0] for keyword in resume_keywords]) for resume_keywords in resumes_keywords]

In [117]:
similarity_threshold = 0.3  # Example threshold;
filtered_resumes = [resumes_str_list[i] for i, score in enumerate(similarities) if score >= similarity_threshold]


In [118]:
filtered_resumes

['complete software development life cycle •Certified Angular developer agile development team Angular development web application Stack Java Developer party application level application application component Java SE quality software user experience Spring Boot MongoDB database software debugging team member •wrote unit test •Oracle Certified Professional work experience NoSQL database',
 'skilled MERN Stack developer Facebook web application 2020Full Stack developer scalable web application complex web application Gotham Event Planner modern mern stack architecture MERN stack natural language processing task PresentMERN Stack Developer Gotham St Gotham city Google Maps api development environment machine learning algorithm UC San Diego data science team user experience restful api share event',
 'personal expense tracker application seamless datum flow accessible web design end DeveloperGoogle HQ user interface modern web technology datum visualization datum storage user authenticati

In [119]:
rerank_results = co.rerank(query=job_desc_str, documents=filtered_resumes, top_n=len(filtered_resumes), model='rerank-english-v2.0')
ranked_indices = [result.index for result in rerank_results.results]
ranked_resumes = [filtered_resumes[i] for i in ranked_indices]

In [120]:
# Extract candidate names from the provided filenames
candidate_names = [extract_candidate_name_from_filename(resume_file) for resume_file in resume_files]

# Map the ranked indices to the candidate names
ranked_candidate_names = [candidate_names[i] for i in ranked_indices]

# Display the ranked candidate names
for idx, candidate_name in enumerate(ranked_candidate_names):
    print(f"Rank {idx + 1}: {candidate_name}")

Rank 1: Alfred Pennyworth
Rank 2: John
Rank 3: Barry Allen
Rank 4: Harvey Dent
Rank 5: Bruce Wayne
