In [1]:
# Cell 1: Imports and Setup
import os
import google.generativeai as genai
from dotenv import load_dotenv
import PyPDF2

# Load environment variables from .env file
load_dotenv()

# Configure the Google Generative AI client
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

print("Libraries imported and API key configured.")

Libraries imported and API key configured.


In [2]:
# Cell 2: Function to Extract Text from PDF

def get_pdf_text(pdf_path):
    """
    Extracts text from a given PDF file.

    Args:
        pdf_path (str): The file path to the PDF document.

    Returns:
        str: The extracted text from the PDF, or None if an error occurs.
    """
    try:
        text = ""
        with open(pdf_path, "rb") as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except FileNotFoundError:
        print(f"Error: The file '{pdf_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while reading the PDF: {e}")
        return None

In [3]:
# Cell 3: Test the Function
# Make sure the resume is in a 'data' subfolder
resume_path = "data/amazon-data-science-resume-example.pdf"
extracted_text = get_pdf_text(resume_path)

if extracted_text:
    print("--- Successfully Extracted Resume Text ---")
    print(extracted_text)
    print("----------------------------------------")

--- Successfully Extracted Resume Text ---
Emma Davis
Amazon Data Scientist
Dynamic data scientist with a strong foundation in machine
learning, data analysis, and problem-solving. Eager to join
Amazon's world-class data science team to leverage data-driven
insights that drive business growth.e.davis@email.com
(123) 456-7890
San Jose, CA
LinkedIn
Work Experience
Adobe-Data Scientist
2018 - current San Jose, CA
Led data analysis initiatives that resulted in a 37% increase in customer retention rates.
Developed predictive models using TensorFlow, reducing forecasting errors by 21%.
Implemented Apache Hadoop to analyze large-scale datasets, improving data processing speed by
33%.
Utilized Pandas and Python for data manipulation, resulting in a 2-hour reduction in data cleaning
time.
Cisco Systems-Junior Data Engineer
2015 - 2018 San Jose, CA
Collaborated with a cross-functional team to develop ETL pipelines, improving data processing
efﬁciency by 26%.
Leveraged Amazon Redshift to optimize

In [4]:
# Cell 4: Define the Prompt Template

# First, read the job description text from the file
try:
    with open('job_description.txt', 'r') as f:
        job_description_text = f.read()
except FileNotFoundError:
    print("Error: 'job_description.txt' not found. Please create this file in your project directory.")
    job_description_text = "" # Set to empty string to avoid further errors

input_prompt = """
You are a highly skilled and experienced technical HR manager with expertise in data science. 
Your task is to review the provided resume against a specific job description.

You must perform a detailed analysis and provide the following in a strict JSON format:
1.  **"JD_Match"**: A percentage match of the resume to the job description.
2.  **"Missing_Keywords"**: A list of key skills or technologies from the job description that are NOT found in the resume.
3.  **"Profile_Summary"**: A concise, professional summary (3-4 sentences) evaluating the candidate's strengths and weaknesses based on the provided texts.

Here is the resume text:
{resume_text}

Here is the job description:
{job_description}
"""

In [5]:
# Cell to list available models
import google.generativeai as genai

print("Searching for available models...\n")
for m in genai.list_models():
    # We are looking for models that support the 'generateContent' method
    if 'generateContent' in m.supported_generation_methods:
        print(f"Found model: {m.name}")

Searching for available models...

Found model: models/gemini-1.5-pro-latest
Found model: models/gemini-1.5-pro-002
Found model: models/gemini-1.5-pro
Found model: models/gemini-1.5-flash-latest
Found model: models/gemini-1.5-flash
Found model: models/gemini-1.5-flash-002
Found model: models/gemini-1.5-flash-8b
Found model: models/gemini-1.5-flash-8b-001
Found model: models/gemini-1.5-flash-8b-latest
Found model: models/gemini-2.5-pro-preview-03-25
Found model: models/gemini-2.5-flash-preview-05-20
Found model: models/gemini-2.5-flash
Found model: models/gemini-2.5-flash-lite-preview-06-17
Found model: models/gemini-2.5-pro-preview-05-06
Found model: models/gemini-2.5-pro-preview-06-05
Found model: models/gemini-2.5-pro
Found model: models/gemini-2.0-flash-exp
Found model: models/gemini-2.0-flash
Found model: models/gemini-2.0-flash-001
Found model: models/gemini-2.0-flash-exp-image-generation
Found model: models/gemini-2.0-flash-lite-001
Found model: models/gemini-2.0-flash-lite
Found

In [6]:
# Cell 5: Run the AI and Get the Response

# Ensure you have the extracted text from Cell 3
# The variable was named 'extracted_text'

if extracted_text and job_description_text:
    # Instantiate the generative model
    model = genai.GenerativeModel('models/gemini-1.5-flash-latest')

    # Format the prompt with the actual resume and job description text
    formatted_prompt = input_prompt.format(resume_text=extracted_text, job_description=job_description_text)

    # Generate the content
    response = model.generate_content(formatted_prompt)

    # Print the AI's response
    print(response.text)
else:
    print("Could not run the model because either the resume text or job description text is missing.")

```json
{
  "JD_Match": 75,
  "Missing_Keywords": [
    "scikit-learn",
    "Keras",
    "Google Cloud Platform",
    "Java",
    "Matlab",
    "Mathematica",
    "JavaScript",
    "RNNs",
    "time series models",
    "Bayesian inference",
    "interactive dashboards",
    "real-time data",
    "supervised/unsupervised learning",
    "deep learning",
    "Financial Services"
  ],
  "Profile_Summary": "Emma Davis possesses a strong background in data science, demonstrated by her experience at Adobe, Cisco, and eBay, showcasing achievements in data analysis, model development (TensorFlow, Hadoop), and process automation.  However, her resume lacks key skills specified in the job description, such as experience with specific ML libraries (scikit-learn, Keras), cloud platforms (GCP), and advanced machine learning techniques (Bayesian inference, RNNs).  Her experience primarily focuses on on-premise solutions, which needs to be addressed in the interview."
}
```

