## Get The text from the PDF

In [1]:
%pip install pdfplumber pytesseract pdf2image

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
from PIL import Image
import os


In [33]:
def extract_text_from_file(file_path):
    text = ""
    ext = os.path.splitext(file_path)[-1].lower()

    # ------------------------------
    # 1️⃣ For PDF files
    # ------------------------------
    if ext == ".pdf":
        try:
            # Try direct text extraction (for digital PDFs)
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"

            if text.strip():
                print("✅ Extracted text using pdfplumber.")
                return text.strip()
        except Exception as e:
            print(f"⚠️ Direct text extraction failed: {e}")

        # Fallback: Use OCR (for scanned PDFs)
        print("⚙️ Falling back to OCR for image-based PDF...")
        try:
            images = convert_from_path(file_path)
            for i, image in enumerate(images):
                gray = image.convert("L")  # Convert to grayscale
                page_text = pytesseract.image_to_string(gray)
                text += f"\n\n--- Page {i+1} ---\n{page_text}"
            print("✅ Extracted text using OCR.")
        except Exception as e:
            print(f"❌ OCR failed: {e}")

    # ------------------------------
    # 2️⃣ For Image files (JPG/PNG)
    # ------------------------------
    elif ext in [".jpg", ".jpeg", ".png"]:
        try:
            image = Image.open(file_path)
            gray = image.convert("L")  # Convert to grayscale
            text = pytesseract.image_to_string(gray)
            print("✅ Extracted text from image using OCR.")
        except Exception as e:
            print(f"❌ Image OCR failed: {e}")
    else:
        print("❌ Unsupported file type. Please use PDF or image files.")

    return text.strip()

In [34]:
file_path = "Resume.pdf"  # ✅ Change this to your file name or image
extracted_text = extract_text_from_file(file_path)

print("\n📝 Extracted Text:")
print("-" * 50)
print(extracted_text if extracted_text else "No text could be extracted.")

⚠️ Direct text extraction failed: [Errno 2] No such file or directory: 'Resume.pdf'
⚙️ Falling back to OCR for image-based PDF...
❌ OCR failed: Unable to get page count. Is poppler installed and in PATH?

📝 Extracted Text:
--------------------------------------------------
No text could be extracted.


## Set Google GenerativeAI Api Key

In [5]:
%pip install google.generativeai python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [28]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model = genai.GenerativeModel("models/gemini-2.5-pro")


In [29]:
response = model.generate_content("What is the capital of India?")


In [30]:
print(response)

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "The capital of India is **New Delhi**."
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "index": 0
        }
      ],
      "usage_metadata": {
        "prompt_token_count": 8,
        "candidates_token_count": 9,
        "total_token_count": 268
      },
      "model_version": "gemini-2.5-pro"
    }),
)


In [24]:
print(response.text)

The capital of India is **New Delhi**.


## Resume Analysis

In [26]:
def analyze_resume(resume_text, job_description=None):
    if not resume_text:
        return {"error": "Resume text is required for analysis."}

    # Create model inside the function
    model = genai.GenerativeModel("models/gemini-2.5-pro")

    base_prompt = f"""
    You are an experienced HR with Technical Experience in the field of any one job role from 
    Data Science, Data Analyst, DevOPS, Machine Learning Engineer, Prompt Engineer, AI Engineer, 
    Full Stack Web Development, Big Data Engineering, Marketing Analyst, Human Resource Manager, 
    or Software Developer. Your task is to review the provided resume.
    Please share your professional evaluation on whether the candidate's profile aligns with the role.
    Also mention skills they already have and suggest some skills to improve the resume, 
    as well as courses they might take to improve these skills. Highlight the strengths and weaknesses.

    Resume:
    {resume_text}
    """

    if job_description:
        base_prompt += f"""
        Additionally, compare this resume to the following job description:
        
        Job Description:
        {job_description}
        
        Highlight the strengths and weaknesses of the applicant in relation to the specified job requirements.
        """

    response = model.generate_content(base_prompt)
    analysis = response.text.strip()
    return analysis


In [27]:
print(analyze_resume(resume_text))


{'error': 'Resume text is required for analysis.'}
