In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Resume Generation

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/education/resume_generator.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Feducation%2Fresume_generator.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/use-cases/education/resume_generator.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/education/resume_generator.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

| | | | |
|-|-|-|-|
|Author(s) | [Aniket Agrawal](https://github.com/aniketagrawal2012) | [Rupjit Chakraborty](https://github.com/lazyprgmr) |

## Overview

This notebook demonstrates how to generate resumes using the Gemini 1.5 Flash model and LLM prompting. It employs various prompts to guide the model in extracting information from existing resumes, generating professional summaries tailored to specific job descriptions, and arranging skills and coursework in order of relevance. The prompts emphasize maintaining truthfulness, using active voice, and adhering to a valid JSON format for the output. This approach allows for the creation of customized and targeted resumes that effectively highlight relevant qualifications and experiences.

## Get started

### Install Vertex AI SDK and other required packages


In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform==1.68.0 python-docx==1.1.2 pypdf==5.0.0 docx2txt==0.8

### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.

# Native Imports
import os

# Third-party Imports
import vertexai
from vertexai.generative_models import GenerationConfig, GenerativeModel

PROJECT_ID = (
    ""  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
)
if not PROJECT_ID or PROJECT_ID == "":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"LOCATION: {LOCATION}")

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries

In [None]:
# Native Imports
import json

# Third-party Imports
import docx
import docx2txt
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph
import pypdf

### Download a sample resume file

In [None]:
! gsutil cp gs://github-repo/generative-ai/gemini/use-cases/education/resume-generation/Sample_1.docx .

### Load model

In [None]:
MODEL_NAME = "gemini-1.5-flash"  # Choice of model

model = GenerativeModel(model_name=MODEL_NAME)

## Resume Generation

### Resume Structure and Prompt Initialization

After installing necessary packages and loading the language model, we initialize predefined prompts designed for resume generation and the related structure. These prompts guide the model to extract relevant information from an existing resume and create a tailored output that aligns with the provided job description.

In [None]:
# Resume structure
resume_structure = [
    "personal_information",
    "professional_summary",
    "education",
    "coursework",
    "skills",
    "experience",
    "certifications",
    "languages",
    "extracurricular_activities",
    "references",
]

In [None]:
# Prompt for extracting information from a resume
PROMPT_EXTRACTION = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
{0}

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Prefer active voice over passive voice.
- Output the Education, Experience, Languages, Extracurricular_Activities and References
sections as a simple numbered list.
"""

# Response schema for PROMPT_EXTRACTION
RESPONSE_SCHEMA_PROMPT_EXTRACTION = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            resume_structure[0]: {
                "type": "ARRAY",
                "items": {
                    "type": "OBJECT",
                    "properties": {
                        "name": {"type": "STRING"},
                        "address": {"type": "STRING"},
                        "mobile_no": {"type": "STRING"},
                        "email": {"type": "STRING"},
                    },
                },
            },
            resume_structure[2]: {"type": "STRING"},
            resume_structure[5]: {"type": "STRING"},
            resume_structure[7]: {"type": "STRING"},
            resume_structure[8]: {"type": "STRING"},
            resume_structure[9]: {"type": "STRING"},
        },
    },
}

# Prompt for generating information based on information extracted from resume
PROMPT_GENERATION = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
{0}

Use the above details and craft a resume to satisfy the job description provided below
{1}

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Prefer active voice over passive voice.
- Generate a concise professional summary in 200 words based on the job description and the
extracted resume.
- Output the professional summary as a paragraph without any headers
"""

# Response schema for PROMPT_GENERATION
RESPONSE_SCHEMA_PROMPT_GENERATION = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            resume_structure[1]: {
                "type": "STRING",
            },
        },
    },
}

# Arrange the contents of sections in resume based on relevance to job description
PROMPT_ARRANGE = """
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below details that have been extracted from a resume
{0}

Use the above details and craft a resume to satisfy the job description provided below
{1}

INSTRUCTIONS:
- Maintain truthfulness and objectivity.
- Proofread and correct spelling and grammatical errors.
- Prefer passive voice over active voice.
- Output Coursework, Skills and Certifications as numbered lists.
- Arrange the Coursework such that the coursework that are most relevant to the job description are
at the top.
- Keep only the Coursework that are relevant to the job description.
- Arrange the Skills such that the skill that are most relevant to the job description are
at the top.
- Keep only the Skills that are relevant to the job description.
- Arrange the Certifications such that the certification that are most relevant to the job
description are at the top.
- Keep only the Certifications that are relevant to the job description.
"""

# Response schema for PROMPT_ARRANGE
RESPONSE_SCHEMA_PROMPT_ARRANGE = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            resume_structure[3]: {
                "type": "STRING",
            },
            resume_structure[4]: {
                "type": "STRING",
            },
            resume_structure[6]: {
                "type": "STRING",
            },
        },
    },
}

# Prompt to extract basic details from resume to help with parsing the resume
PROMPT_EXTRACT_BASIC_DETAILS = f"""
You are a seasoned career advising professional expert in crafting resumes and cover letters.

Consider the below resume
{0}

Extract the following information from the given resume
1. Name
2. Address
3. Mobile Number
4. Email address

INSTRUCTIONS:
- Do not make up any information. 
- If you cannot find some information then say 'Not Found'.
"""

# Response schema for PROMPT_EXTRACT_BASIC_DETAILS
RESPONSE_SCHEMA_PROMPT_EXTRACT_BASIC_DETAILS = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            "name": {
                "type": "STRING",
            },
            "address": {"type": "STRING"},
            "mobile_no": {"type": "STRING"},
            "email": {"type": "STRING"},
        },
    },
}

### Defining the core functionalities

This code defines a series of functions for processing resumes. It uses Gemini to extract key information like contact details and to generate tailored professional summaries. The code also includes functions for parsing resumes from different file formats (docx, pdf, txt), handling potential missing information, and structuring the final output for readability.

In [None]:
def basic_info_extraction(resume: str) -> dict[str, str]:
    """
    Get the name, address, mobile number and email address from the resume

    Args:
        resume: The parsed resume as str

    Returns:
        Details requested in the prompt as dict
    """

    resp = model.generate_content(
        PROMPT_EXTRACT_BASIC_DETAILS.format(resume),
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema=RESPONSE_SCHEMA_PROMPT_EXTRACT_BASIC_DETAILS,
        ),
    )
    personal_details = json.loads(resp.text)
    return personal_details[0]

In [None]:
def read_pdf(file_path: str) -> str:
    """Extracts text from a PDF.

    Args:
        file_path: path to the pdf file

    Returns:
        resume content as plain text
    """
    with open(file_path, "rb") as file:
        reader = pypdf.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def iter_block_items(parent: Table) -> str:
    """
    Iterator over a docx file object.

    Args:
        parent: an iterable of type Table

    Returns:
        body text of each block
    """
    if isinstance(parent, Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("Error in reading docx file")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)

In [None]:
def convert_docx_to_text(file_path: str) -> str:
    """
    Parse the docx file and convert it to text.

    Args:
        file_path: the path of the docx file to be parsed

    Returns:
        Parsed resume as string
    """
    parsed_doc = ""
    doc = docx.Document(file_path)
    for block in iter_block_items(doc):
        if isinstance(block, Table):  # check block is table
            for row in block.rows:  # each table row
                tab_row = [cell.text for cell in row.cells]
                tab_row = list(
                    filter(lambda x: x != "", tab_row)
                )  # remove empty strings
                if len(tab_row) > 1 and len(set(tab_row)) == 1:
                    tab_row = tab_row[0]
                    parsed_doc += "".join(tab_row) + "\n"
                else:
                    parsed_doc += ", ".join(tab_row) + "\n"
        else:
            parsed_doc += block.text + "\n"  # paragraph text
    return parsed_doc

In [None]:
def parse_resume(res_path: str) -> str:
    """
    Driver method to parse resume with fallbacks.

    Args:
        res_path: Path to the resume file

    Returns:
        The parsed resume as string
    """
    file_type = res_path.rsplit(".", 1)[-1]
    if file_type == "docx":
        parsed_resume = convert_docx_to_text(file_path=res_path).strip()
        # check if name, address, email address and mobile no. has been extracted
        personal_details = basic_info_extraction(parsed_resume)
        if "Not Found" in personal_details.values():
            parsed_resume_2 = docx2txt.process(res_path)
            per_details_2 = ""
            personal_details_2 = basic_info_extraction(parsed_resume_2)
            for k, v in personal_details_2.items():
                per_details_2 += f"{k}: {v}\n"
            parsed_resume = per_details_2 + "\n" + parsed_resume
    elif file_type == "pdf":
        parsed_resume = read_pdf(res_path).strip()
    elif file_type == "txt":
        with open(res_path) as resume_text:
            parsed_resume = resume_text.read().strip()
    else:
        raise Exception("File is not of valid type")
    return parsed_resume

In [None]:
def structure_response(resume_generated: str) -> str:
    """
    Structure the resume to align with a given structure.

    Args:
        resume_generated: The resume generated using LLM

    Returns:
        The structured resume as string
    """
    response_str = ""

    for k in resume_structure:
        try:
            block, block_info = k, resume_generated[k]
        except KeyError as ke:
            print(f"Could not find section {k}")
            continue
        response_str = f"{response_str}\n{block}\n"
        if isinstance(block_info, list):
            try:
                response_str += "\n".join(block_info) + "\n"
            except:
                response_str += (
                    "\n".join([f"{k}:{v}" for k, v in block_info[0].items()]) + "\n"
                )
        else:
            response_str = f"{response_str} {block_info}\n"
    return response_str

In [None]:
def get_response(
    resume: str,
    job_description: str,
) -> str:
    """
    Generate the resume using the chosen model.

    Args:
        res: parsed resume as string
        jd: parsed job description as string

    Returns:
        Collated resume information as string
    """
    model_response_extraction = model.generate_content(
        PROMPT_EXTRACTION.format(resume),
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema=RESPONSE_SCHEMA_PROMPT_EXTRACTION,
        ),
    ).text

    model_response_gen = model.generate_content(
        PROMPT_GENERATION.format(resume, job_description),
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema=RESPONSE_SCHEMA_PROMPT_GENERATION,
        ),
    ).text

    model_response_arr = model.generate_content(
        PROMPT_ARRANGE.format(resume, job_description),
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema=RESPONSE_SCHEMA_PROMPT_ARRANGE,
        ),
    ).text

    resume_generated = json.loads(model_response_extraction)[0]
    resume_generated.update(json.loads(model_response_gen)[0])
    resume_generated_3 = json.loads(model_response_arr)[0]
    resume_generated.update(resume_generated_3)

    return resume_generated

### Testing the overall functionality

This code tests a resume generator. It parses a sample resume and job description, feeds them to an LLM, and structures the generated output.

In [None]:
# Test resume generation


resume_path = "Sample_1.docx"
parsed_resume = parse_resume(res_path=resume_path)

sample_job_description = """
PHP Developer
Job Description
    •Development of robust and user-friendly PHP-based web applications
    •Creation of site layout/user interface from provided design concepts
    •Ability to work independently on projects without detailed
    instructions; taking the initiative with this job is important.
    •Routine daily maintenance of existing sites and web applications.
Requirements
    •Knowledge of PHP/MySQL.
    •Experience developing in PHP, which requires integration with
    databases such as MySQL.
    •Experience programming in HTML, CSS, XML, JavaScript, jQuery, etc.
    •Experience in WordPress development is a plus.
    •Business requirements analysis ability is a plus.
    •Good oral and written communication skills.
    •Strong desire to develop new technical knowledge and professional
    skills on a continual basis.
    •Ability to work with only general supervision.
    •Ability to supporting sales on technical demonstration of system
    (if needed).
    •Ability to give system training to customer (if needed).
    •Ability to work well independently and with others.
"""

generated_resume = get_response(parsed_resume, sample_job_description)
structured_resume = structure_response(generated_resume)
print(f"Generated Resume \n {structured_resume}")