In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Resume Generation

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/resume_generate.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fresume_generate.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/resume_generate.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/resume_generate.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

| | | | |
|-|-|-|-|
|Author(s) | [Aniket Agrawal](https://github.com/aniketagrawal2012) | [Rupjit Chakraborty](https://github.com/lazyprgmr)

## Overview

This notebook demonstrates how to generate resumes using the Gemini 1.5 Flash 001 model and LLM prompting. It employs various prompts to guide the model in extracting information from existing resumes, generating professional summaries tailored to specific job descriptions, and arranging skills and coursework in order of relevance. The prompts emphasize maintaining truthfulness, using active voice, and adhering to a valid JSON format for the output. This approach allows for the creation of customized and targeted resumes that effectively highlight relevant qualifications and experiences.

## Get started

### Install Vertex AI SDK and other required packages


In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform
%pip install python-docx pypdf docx2txt



### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os

import vertexai

PROJECT_ID = (
    ""  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
)
if not PROJECT_ID or PROJECT_ID == "":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries

In [None]:
# Native Imports
import os
from ast import literal_eval
from typing import Dict

# Third-party Imports
import docx
import docx2txt
import pypdf
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph
from vertexai.generative_models import GenerativeModel

### Load model

In [None]:
# TODO: Uncomment and add model name
# MODEL_ID = ""  # @param {type:"string", isTemplate: true}

MODEL_NAME = "gemini-1.5-flash-001"  # Choice of model
generation_config = {"temperature": 0}

model = GenerativeModel(
    model_name=MODEL_NAME,
    generation_config=generation_config,
)

## Notebook Tutorial

### Resume Structure and Prompt Initialization

After installing necessary packages and loading the language model, we initialize predefined prompts designed for resume generation and the related structure. These prompts guide the model to extract relevant information from an existing resume and create a tailored output that aligns with the provided job description.

In [None]:
"""
------------------------
EXTRACTION
------------------------
Personal Information
Education
Languages
Extracurricular_Activities
References
Experience

------------------------
GENERATION
------------------------
Professional_Summary

--------------------------------------------------
ARRANGEMENT, TRUNCATION, GENERATION
--------------------------------------------------
Coursework
Skills
Certifications

"""

PROMPT_EXTR = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
%s

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Prefer active voice over passive voice.
- The output should have the following valid JSON format
{
"Personal_Information": {"Name": "", "Address": "", "Mobile_No": "", "Email": ""},
"Education": "",
"Experience": "",
"Languages": "",
"Extracurricular_Activities": "",
"References": "",
}
- Output the Education, Experience, Languages, Extracurricular_Activities and References
sections as a simple numbered list.
"""

PROMPT_GEN = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
%s

Use the above details and craft a resume to statisfy the job description provided below
%s

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Prefer active voice over passive voice.
- Generate a concise professional summary in 200 words based on the job description and the
extracted resume.
- Return the professional summary as a paragraph without any headers.
"""

PROMPT_ARR = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
%s

Use the above details and craft a resume to statisfy the job description provided below
%s

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Proofread and correct spelling and grammatical errors.
- Prefer passive voice over active voice.
- The output should have the following valid JSON format
{
"Coursework": "",
"Skills": "",
"Certifications": "",
}
- Output Coursework, Skills and Certifications as numbered lists.
- Arrange the Coursework such that the coursework that are most relevant to the job description are
at the top.
- Keep only the Coursework that are relevant to the job description.
- Arrange the Skills such that the skill that are most relevant to the job description are
at the top.
- Keep only the Skills that are relevant to the job description.
- Arrange the Certifications such that the certification that are most relevant to the job
description are at the top.
- Keep only the Certifications that are relevant to the job description.
"""

PROMPT_RESUME = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
%s

Use the above details and craft a resume to statisfy the job description provided below
%s

Instructions:
- Maintain truthfulness and objectivity
- Proofread and correct spelling and grammatical errors
- Prefer active voice over passive voice
- The output resume should have the following valid JSON format
{
"Personal_Information": {"Name": "", "Address": "", "Mobile_No": "", "Email": ""},
"Professional_Summary": "",
"Education": "",
"Coursework": "",
"Skills": "",
"Experience": "",
"Certifications": "",
"Languages": "",
"Extracurricular_Activities": "",
"References": "",
}
- Generate a concise Professional_Summary based on the job description
- Output Education, Coursework, Skills, Experience, Certifications, Languages,
Extracurricular_Activities and References as numbered lists.
- Arrange the Coursework such that the coursework that are most relevant to the job description are
at the top.
- Arrange the Skills such that the skill that are most relevant to the job description are
at the top
- Arrange the Certifications such that the certification that are most relevant to the job
description are at the top
"""

In [None]:
# Resume structure
resume_struc = [
    "Personal_Information",
    "Professional_Summary",
    "Education",
    "Coursework",
    "Skills",
    "Experience",
    "Certifications",
    "Languages",
    "Extracurricular_Activities",
    "References",
]

### Defining the core functionalities

This code defines a series of functions for processing resumes. It uses Gemini to extract key information like contact details and to generate tailored professional summaries. The code also includes functions for parsing resumes from different file formats (docx, pdf, txt), handling potential missing information, and structuring the final output for readability.

*   def check_info(resume: str) -> Dict[str, str]: Uses Gemini to extract name, address, mobile number, and email from a resume, returning 'Not Found' if information is missing.
    
*   def get_info(resume: str) -> str: Similar to check_info but returns the extracted information as a formatted string.

*   def iter_block_items(parent: Table) -> str:
    Iterates through a docx file object, yielding paragraphs and tables.

*   def convert_docx_to_text(file_path: str) -> str:
    Parses a docx file and converts it to text, handling tables and paragraphs.

*   def parse_resume(res_path: str) -> str:
    Parses a resume from various file types (docx, pdf, txt) with fallbacks to handle missing personal information.

*   def get_response(parsed_res: str, parsed_jd: str) -> str:
    Generates a resume using Gemini, incorporating extracted information, a professional summary, and arranged skills/coursework.

*   def structure_response(resume_generated: str) -> str:
    Structures the generated resume into a specific format for readability.

In [None]:
def check_info(resume: str) -> Dict[str, str]:

    vertexai.init(
        project="aniket-tdcx", location="us-central1"
    )  # Initialize the AI Platform client with project ID

    """
    Check if the name, address, mobile number and email address is present in the resume

    Args:
        resume: The parsed resume as str

    Returns:
        Details requested in the prompt as dict
    """
    prompt = f"""
    Extract the following candidate information from the given resume
    1. Name
    2. Address
    3. Mobile Number
    4. Email address
    Text : {resume}

    INSTRUCTIONS:
    - In case the information is not present, return 'Not Found'
    - The final output should be in valid JSON format
    """
    resp = model.generate_content(prompt).text
    return literal_eval(resp.split("```json")[-1].split("```")[0].replace("\n", ""))

In [None]:
def read_pdf(file_path: str) -> str:
    """Extracts text from a PDF.

    Args:
        file_path: path to the pdf file

    Returns:
        resume content as plain text
    """
    with open(file_path, "rb") as file:
        reader = pypdf.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def get_info(resume: str) -> str:
    """
    Get the name, address, mobile number and email address from the resume

    Args:
        resume: The parsed resume as str

    Returns:
        Details requested in the prompt as string
    """
    prompt = f"""
    Extract the following candidate information from the given resume
    1. Name
    2. Address
    3. Mobile Number
    4. Email address
    Text : {resume}

    INSTRUCTIONS:
    - In case the information is not present, return 'Not Found'
    - The final output should be in valid JSON format
    """
    resp = model.generate_content(prompt).text
    personal_details = literal_eval(
        resp.split("```json")[-1].split("```")[0].replace("\n", "")
    )
    per_details = ""
    for k_detail, v_desc in personal_details.items():
        per_details += k_detail + ": " + v_desc + "\n"
    return per_details

In [None]:
def iter_block_items(parent: Table) -> str:
    """
    Iterator over a docx file object.

    Args:
        parent: an iterable of type Table

    Returns:
        body text of each block
    """
    if isinstance(parent, Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("Error in reading docx file")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)

In [None]:
def convert_docx_to_text(file_path: str) -> str:
    """
    Parse the docx file and convert it to text.

    Args:
        file_path: the path of the docx file to be parsed

    Returns:
        Parsed resume as string
    """
    parsed_doc = ""
    doc = docx.Document(file_path)
    for block in iter_block_items(doc):
        if isinstance(block, Table):  # check block is table
            for row in block.rows:  # each table row
                tab_row = [cell.text for cell in row.cells]
                tab_row = list(
                    filter(lambda x: x != "", tab_row)
                )  # remove empty strings
                if len(tab_row) > 1 and len(set(tab_row)) == 1:
                    tab_row = tab_row[0]
                    parsed_doc += "".join(tab_row) + "\n"
                else:
                    parsed_doc += ", ".join(tab_row) + "\n"
        else:
            parsed_doc += block.text + "\n"  # paragraph text
    return parsed_doc

In [None]:
def parse_resume(res_path: str) -> str:
    """
    Driver method to parse resume with fallbacks.

    Args:
        res_path: Path to the resume file

    Returns:
        The parsed resume as string

    Raises:

    """
    file_type = res_path.rsplit(".", 1)[-1]
    if file_type == "docx":
        parsed_resume = convert_docx_to_text(file_path=res_path).strip()
    elif file_type == "pdf":
        parsed_resume = read_pdf(res_path).strip()
    elif file_type == "txt":
        with open(res_path, "r") as f_hdl:
            parsed_resume = f_hdl.read().strip()
    else:
        raise Exception("File is not of valid type")

    personal_details = check_info(parsed_resume)
    if "Not Found" in personal_details.values():
        parsed_resume_2 = docx2txt.process(res_path)
        personal_details_2 = get_info(parsed_resume_2)
        parsed_resume = personal_details_2 + "\n" + parsed_resume

    return parsed_resume

In [None]:
def get_response(
    parsed_res: str,
    parsed_jd: str,
) -> str:
    """
    Generate the resume using the chosen model.

    Args:
        parsed_res: parsed resume as string
        parsed_jd: parsed job description as string

    Returns:
        Collated resume information as string
    """
    model_response_extr = model.generate_content(
        PROMPT_EXTR % (parsed_res),
    ).text
    model_response_gen = model.generate_content(
        PROMPT_GEN % (parsed_res, parsed_jd),
    ).text
    model_response_arr = model.generate_content(
        PROMPT_ARR % (parsed_res, parsed_jd),
    ).text

    resume_generated = literal_eval(
        model_response_extr.split("```json")[-1].split("```")[0].replace("\n", "")
    )
    resume_generated.update({"Professional_Summary": model_response_gen})
    resume_generated_3 = literal_eval(
        model_response_arr.split("```json")[-1].split("```")[0].replace("\n", "")
    )
    resume_generated.update(resume_generated_3)

    return resume_generated

In [None]:
def structure_response(resume_generated: str) -> str:
    """
    Structure the resume to align with a given structure.

    Args:
        resume_generated: The resume generated using LLM

    Returns:
        The structured resume as string
    """
    resume_structured = {}
    for k in resume_struc:
        resume_structured[k] = resume_generated[k]
    response_str = ""
    for blc, blc_info in resume_structured.items():
        response_str = f"{response_str}\n{blc}\n"
        if blc == "Personal_Information":
            tmp = ""
            for blc_title, blc_desc in blc_info.items():
                tmp = f"{tmp}\n{blc_title}: {blc_desc}\n"
            response_str = f"{response_str}\n{tmp}\n"
        elif isinstance(blc_info, list):
            # pylint: disable=broad-exception-caught
            try:
                response_str += "\n".join(blc_info) + "\n"
            except:
                response_str += (
                    "\n".join([f"{k}:{v}" for k, v in blc_info[0].items()]) + "\n"
                )
            # pylint: enable=broad-exception-caught
        else:
            response_str = f"{response_str}\n{blc_info}\n"
    return response_str

### Testing the overall functionality

This code tests a resume generator. It parses a sample resume and job description, feeds them to an LLM, and structures the generated output.

In [None]:
# Test resume generation

res_path = "Sample_1.docx"

parsed_resume = parse_resume(res_path=res_path)

jd = """
PHP Developer
Job Description
    •Development of robust and user-friendly PHP-based web applications
    •Creation of site layout/user interface from provided design concepts
    •Ability to work independently on projects without detailed
    instructions; taking the initiative with this job is important.
    •Routine daily maintenance of existing sites and web applications.
Requirements
    •Knowledge of PHP/MySQL.
    •Experience developing in PHP, which requires integration with
    databases such as MySQL.
    •Experience programming in HTML, CSS, XML, JavaScript, jQuery, etc.
    •Experience in WordPress development is a plus.
    •Business requirements analysis ability is a plus.
    •Good oral and written communication skills.
    •Strong desire to develop new technical knowledge and professional
    skills on a continual basis.
    •Ability to work with only general supervision.
    •Ability to supporting sales on technical demonstration of system
    (if needed).
    •Ability to give system training to customer (if needed).
    •Ability to work well independently and with others.
"""

generated_resume = get_response(parsed_resume, jd)
structured_resume = structure_response(generated_resume)

print("Generated Resume")
print(structured_resume)

Generated Resume

Personal_Information


Name: Atikah Wahyuni

Address: Blk 233 Sengkang West Drive, #05-709 S(540233)

Mobile_No: 91304475

Email: atikah_wahyuni3@connect.ite.edu.sg


Professional_Summary

A highly motivated and results-oriented IT graduate with a strong foundation in web development and a passion for creating user-friendly applications. I possess a comprehensive understanding of PHP, MySQL, HTML, CSS, XML, JavaScript, and jQuery, enabling me to develop robust and scalable web solutions. My experience with WordPress development and proficiency in business requirements analysis further enhance my ability to deliver high-quality projects. I am a quick learner, eager to expand my technical knowledge and skills, and thrive in collaborative environments. My internship at BLK.SG provided valuable experience in co-creating a digital marketing platform, demonstrating my ability to work independently and contribute to meaningful projects. I am confident in my ability to contri