In [8]:
import pdfplumber
import pytesseract
from pdf2image import convert_from_path

In [1]:
def extract_text_from_pdf(pdf_path):
    text = ""
    
    try:
        # Try direct text extraction
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
    
        if text.strip():
            return text.strip()
    except Exception as e:
        print(f"Direct text extraction failed: {e}")
        
    #Fallback to OCR for image based PDFs
    print("Falling back to OCR for image-based PDF")
    try:
        images= convert_from_path(pdf_path)
        for image in images:
            page_text= pytesseract.image_to_string(image)
            text += page_text + "\n"
    except Exception as e:
        print(f"OCR extraction failed: {e}")
        
    return text.strip()

In [2]:
pdf_path = "resume.pdf"
resume_text = extract_text_from_pdf(pdf_path)
print("extract text from pdf")
print(resume_text)

Direct text extraction failed: name 'pdfplumber' is not defined
Falling back to OCR for image-based PDF
OCR extraction failed: name 'convert_from_path' is not defined
extract text from pdf



In [3]:
import google.generativeai as genai 
import os
from dotenv import load_dotenv

load_dotenv()
genai.configure(api_key=os.getenv("GENAI_API_KEY"))
model= genai.GenerativeModel("gemini-2.0-flash")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
response = model.generate_content("What is the capital of India?")
print(response)

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "The capital of India is **New Delhi**.\n"
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "avg_logprobs": -0.005013660341501236
        }
      ],
      "usage_metadata": {
        "prompt_token_count": 7,
        "candidates_token_count": 10,
        "total_token_count": 17
      },
      "model_version": "gemini-2.0-flash"
    }),
)


In [5]:
print(response.text)

The capital of India is **New Delhi**.



In [6]:
def analyze_resume(resume_text, job_description=None):
    if not resume_text:
        return {"error": "Resume text is required for analysis."}
    
    model= genai.GenerativeModel("gemini-2.0-flash")
    
    base_prompt = f"""
    You are an experienced HR with Technical Experience in the field of any one job role from Data Science, Data Analyst, DevOPS, Machine Learning Engineer, Prompt Engineer, AI Engineer, Full Stack Web Development, Big Data Engineering, Marketing Analyst, Human Resource Manager, Software Developer your task is to review the provided resume.
    Please share your professional evaluation on whether the candidate's profile aligns with the role.ALso mention Skills he already have and siggest some skills to imorve his resume , alos suggest some course he might take to improve the skills.Highlight the strengths and weaknesses.
    
    Resume:
    {resume_text}
    """
    if job_description:
        base_prompt += f"""
        Aditionally compare the resume with the following job description and provide a detailed analysis:
        
        job_description:
        {job_description}
        
        Highlight the strenghts and weaknesses of the applicant in relation to the job description.
        """
        
    response= model.generate_content(base_prompt)
    
    analysis= response.text.strip()
    return analysis

In [7]:
print(analyze_resume(resume_text))

{'error': 'Resume text is required for analysis.'}
