#Resume - Skill Gap Analysis

.

In [None]:
!pip install openai
!pip install Langchain
!pip install tiktoken
!pip install docarray
!pip install pypdf
!pip install faiss-gpu
!pip install llama_index

.

#1. Import required libraries.

In [130]:
import openai
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.document_loaders import CSVLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

.

#2. Functions to read flat files & pdf files.

In [131]:
def read_flat_file(file_name):
  with open(file_name) as f:
    file_content = f.read()
  return file_content

def read_pdf_file(file_name, chunk_size=1500, chunk_overlap=150):
  loader = PyPDFLoader(file_path=file_name)
  pages = loader.load()
  splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  splits = splitter.split_documents(pages)
  return splits

.

#3. Load OpenAI keys, instantiate client objects & load embeddings.

In [132]:
os.environ["OPENAI_API_KEY"] = read_flat_file("openai_key")

chatgpt3_5_turbo = ChatOpenAI(model="gpt-3.5-turbo")
chatgpt3_5_turbo_instruct = ChatOpenAI(model="gpt-3.5-turbo-instruct")
chatgpt4 = ChatOpenAI(model="gpt-4")

embedding = OpenAIEmbeddings()

.

#4. Function to create Vector Database from document splits.

In [133]:
def create_vectordb_from_document_splits(document_splits, embedding):
  return FAISS.from_documents(document_splits, embedding=embedding)

.

#5. Function to get Output Parser.

In [134]:
def get_output_parser():
  strengths = ResponseSchema(name="strengths", description="Strengths of the candidate")
  weaknesses = ResponseSchema(name="weaknesses", description="Weaknesses of the candidate.")
  summary = ResponseSchema(name="summary", description="Short summary of the analysis mentioning eligibility of the candidate.")
  is_shortlisted = ResponseSchema(name="is_shortlisted", description="Do candidate's skills match with required skills? (Yes/No)")
  return StructuredOutputParser.from_response_schemas([strengths, weaknesses, summary, is_shortlisted])

.

#6. Function to get prompt template.

In [135]:
def get_prompt_template():

  prompt_template_text = """
  You will be provided with a Job Description enclosed within {delimiter_job_description} delimiter.
  You'll also be provided skills of the candidate enclosed within {delimiter_skills} delimiter.

  Depending upon these two inputs, please provide the skill gap analysis for the employer to short-list resume.
  Please make sure that the analysis is concise and to the point.


  <Candidate's Skills>

  {delimiter_skills}{context}{delimiter_skills}


  <Job Description>

  {delimiter_job_description}{job_description}{delimiter_job_description}

  {instructions}

  """

  return PromptTemplate(template=prompt_template_text,
                        input_variables=["context", "job_description", "delimiter_skills", "delimiter_job_description", "instructions"])

.

#7. Function to get QA chain.

In [136]:
def get_qa_chain(chat_client, prompt):
  return load_qa_chain(llm=chat_client, chain_type="stuff", prompt=prompt)

.

#8. Function to convert output dictionary in a printable format

In [137]:
def return_pretty_print(input_dictionary):
  pretty_string = ""
  if input_dictionary["is_shortlisted"] == "Yes":
    pretty_string += "\nThe candidate is short-listed for interviews.\n"
  else:
    pretty_string += "\nThe candidate is not short-listed for interviews.\n"
  pretty_string += f"\nStrengths: {input_dictionary['strengths']}\n"
  pretty_string += f"\nWeaknesses: {input_dictionary['weaknesses']}\n"
  pretty_string += f"\nOverall Summary: {input_dictionary['summary']}"
  pretty_string = pretty_string.replace('. ', '.\n')
  return pretty_string

.

#8, Function to run QA chain.

In [138]:
def run_qa_chain(resume, job_description, chat_client):
  query = "What are the skills and educational qualifications of the candidate?"
  prompt = get_prompt_template()
  output_parser = get_output_parser()
  instructions = output_parser.get_format_instructions()
  job_description_text = read_flat_file(job_description)
  document_splits = read_pdf_file(resume, chunk_size=1500, chunk_overlap=150)
  vectordb = create_vectordb_from_document_splits(document_splits, embedding)
  skills = vectordb.similarity_search(query, k=2)
  chain = get_qa_chain(chat_client, prompt)
  prompt_inputs = {"input_documents": skills, "job_description": job_description_text,
                   "delimiter_skills": "###", "delimiter_job_description": "$$$", "instructions": instructions}
  response = chain(prompt_inputs, return_only_outputs=True)
  response_dict = output_parser.parse(response["output_text"])
  return return_pretty_print(response_dict)

.

#9. Testing the application

#Using ML resume and ML JD with chatgpt 3.5

In [139]:
analysis = run_qa_chain(resume="Sanket_Patole_Machine_Learning.pdf",
                        job_description="Job Description Machine Learning.txt",
                        chat_client=chatgpt3_5_turbo)
print(analysis)


The candidate is short-listed for interviews.

Strengths: The candidate has strong skills in programming, particularly in Python, UnixShell, Excel-VBA, Gitlab-CICD, and AWS-Cloudformation.
They also have experience with data science techniques such as supervised and unsupervised learning, computer vision, and scikit-learn.
In addition, the candidate has expertise in data engineering tools and technologies like SQL, Snowflake, Pyspark, and various AWS services.

Weaknesses: The candidate does not mention any weaknesses in their skills or experience.

Overall Summary: The candidate possesses a strong skill set in programming, data science, and data engineering.
They have experience working with various tools and technologies relevant to the job description.
Overall, the candidate appears to be eligible for the machine learning engineer position.


#Using the same ML resume and JD with chatgpt 4

In [140]:
analysis = run_qa_chain(resume="Sanket_Patole_Machine_Learning.pdf",
                        job_description="Job Description Machine Learning.txt",
                        chat_client=chatgpt4)
print(analysis)


The candidate is not short-listed for interviews.

Strengths: The candidate, Sanket Patole, is proficient in Python, one of the required languages for the position.
He also has in-depth knowledge of data engineering, data science, and AWS Cloud, which may be beneficial in constructing optimized data pipelines for machine learning models.
He also has experience working with Keras, a demanded machine learning framework.
He also has a certification as an AWS Certified Cloud Practitioner, which could speak to his ability to work in cloud-based environments.

Weaknesses: The candidate does not list experience with Java, R, or PyTorch, which is desired for this role.
He also does not list any direct experience as a machine learning engineer, which is a preferred qualification.
While he has extensive skills and experience in related fields, it is not clear if he has specific experience with machine learning algorithms and tools, or building machine learning systems.

Overall Summary: Sanket 

#Using ML resume and Full stack Developer JD with chatgpt 3.5

In [141]:
analysis = run_qa_chain(resume="Sanket_Patole_Machine_Learning.pdf",
                        job_description="Job Description Full Stack Developer.txt",
                        chat_client=chatgpt3_5_turbo)
print(analysis)


The candidate is not short-listed for interviews.

Strengths: The candidate has strong skills in Python, Unix Shell, Excel-VBA, Gitlab-CICD, AWS-Cloudformation, RestAPIs, Supervised Learning, Unsupervised Learning, Computer Vision, Scikit-Learn, Keras, Statistics, EDA, Feature Engineering, SQL, Snowflake, Pyspark, AWS-S3, AWS-Lambda, AWS-State Machine, Docker, AWS-ECR/AWS-EKS, SnowPipe, AWS-Fargate, Abinitio ETLTool, Abinitio Metadata Hub, Jira, Scrum, Client Communication, and Technical Interviewing.
They also have a certification in AWS Certified Cloud Practitioner.

Weaknesses: The candidate does not have experience with Java, Spring, Springboot, Angular, and React, which are required skills for the Full Stack Developer role.
They also do not have experience with desktop and mobile application development and familiarity with common stacks.

Overall Summary: The candidate has strong skills in various programming languages, data science, and data engineering.
However, they lack the 

#Using ML resume and Full stack Developer JD with chatgpt 4

In [143]:
analysis = run_qa_chain(resume="Sanket_Patole_Machine_Learning.pdf",
                        job_description="Job Description Full Stack Developer.txt",
                        chat_client=chatgpt4)
print(analysis)


The candidate is not short-listed for interviews.

Strengths: The candidate has strong experience in Python, SQL, AWS, Docker, and has proven experience in Data Science and Data Engineering.
His knowledge in AWS cloud platform, ETL applications and various databases like PostgreSQL and Snowflake, along with his certifications and projects also add to his strengths.
He has also shown his ability to work in a team and has good client communication skills.

Weaknesses: The candidate lacks experience in front-end languages and libraries (like HTML/CSS, JavaScript, XML, jQuery), back-end languages (like C#, Java), JavaScript frameworks (like Angular, React, Node.js), and UI/UX design.
The candidate also has no stated experience with mobile application development, and no stated knowledge of Spring and Springboot.

Overall Summary: Sanket Patole has a rich background in backend development, data science and data engineering, with a focus on Python, SQL, AWS and other related technologies.
H