In [80]:
import re
import json
import requests
import pdfplumber
import pandas as pd
import psycopg2
from psycopg2 import sql
from collections import Counter
from datetime import datetime
 
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        pdf_text = ''
        for page in pdf.pages:
            pdf_text += page.extract_text()
    return pdf_text
 
def parse_resume(ocr_text):
    lines = ocr_text.split('\n')
    name = ''
    email = ''
    phone = ''
    dob = ''
    experience = ''
    current_company = ''
    college = ''
    skills = []
    in_skills_section = False

    phone_pattern = r'[\+\(]?[1-9][0-9 .\-\(\)]{8,}[0-9]'
    dob_pattern = r'\d{1,2}-\d{1,2}-\d{4}'  
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
 
    for line in lines:
        line = line.strip()
        if not phone:
            match = re.search(phone_pattern, line)
            if match:
                phone = match.group(0)
        if not email:
            email_match = re.search(email_pattern, line)
            if email_match:
                email = email_match.group(0)
        if not dob and ('DOB' in line or 'Date of Birth' in line):
            dob_match = re.search(dob_pattern, line)
            if dob_match:
                dob_value = dob_match.group(0)
                try:
                    dob = datetime.strptime(dob_value, '%d-%m-%Y').strftime('%Y-%m-%d')  
                except ValueError:
                    dob = ''
        if 'Experience' in line or 'Years of Experience' in line:
            experience = line
        if 'Current Company' in line or 'Company' in line:
            current_company = line
        if 'College' in line or 'University' in line:
            college = line
        if 'Skills' in line or 'Technical Skills' in line:
            in_skills_section = True
            continue
        if line and not name:
            name = line

        if in_skills_section:

            if line:

                skills.append(line.strip())

    return name, email, phone, dob, experience, current_company, college, skills
 
def count_words(ocr_text):

    words = ocr_text.split()

    return len(words)
 
def most_common_words(ocr_text, num_common=5):

    words = re.findall(r'\b\w+\b', ocr_text.lower())

    common_words = Counter(words).most_common(num_common)

    return common_words
 
def send_request_to_gemini(prompt):

    gemini_url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key=AIzaSyBEkIe54ecBolm-EwKjMGN9zSqvolxCz1o'

    headers = {

        'Content-Type': 'application/json',

    }

    try:

        response = requests.post(gemini_url, headers=headers, json=prompt)

        if response.status_code == 200:

            parsed_data = response.json()

            if 'text' in parsed_data and parsed_data['text']:

                generated_text = parsed_data['text']

            else:

                generated_text = None  

            return generated_text

        else:

            print(f"Request failed with status code {response.status_code}")

            print(response.text)

            return None

    except requests.exceptions.RequestException as e:

        print(f"Error with API request: {e}")

        return None
 
def insert_into_postgres(df):

    host = '172.17.0.6'
    port = '5432'
    database = 'postgres'
    user = 'postgres'
    password = 'niranjan'
    conn = None

    try:

        conn = psycopg2.connect(
            host=host,
            port=port,
            database=database,
            user=user,
            password=password

        )

        cur = conn.cursor()

        for index, row in df.iterrows():

            insert_query = sql.SQL('''

                INSERT INTO resumes (name, email, phone, dob, skills, college)

                VALUES (%s, %s, %s, %s, %s, %s)

            ''')

            dob_value = row['DOB'] if row['DOB'] else None

            cur.execute(insert_query, (

                row['Name'],
                row['Email'],
                row['Phone'],
                dob_value,
                row['Skills'],
                row['College'],
            ))
            conn.commit()

        print("Data inserted successfully into PostgreSQL")
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error inserting data into PostgreSQL: {error}")
    finally:

        if conn is not None:

            conn.close()
 
def main():

    pdf_file_path = 'Vg.shibu (CV..).pdf'

    ocr_text = extract_text_from_pdf(pdf_file_path)

    name, email, phone, dob, experience, current_company, college, skills = parse_resume(ocr_text)

    total_words = count_words(ocr_text)

    common_words = most_common_words(ocr_text)

    prompt = {

        "contents": [

            {

                "parts": [

                    {

                        "text": f"Given the resume, fetch the name: {name}, email: {email}, phone: {phone}, dob: {dob}, experience: {experience}, current company: {current_company}, college: {college}, top 5 skills: {', '.join(skills)}, vertica as one of Full stack, Data Engineering, Dev Ops, Manual Testing, Automation."

                    }

                ]

            }

        ]

    }

    prompt_json = json.dumps(prompt)

    generated_text = send_request_to_gemini(prompt)

    df = pd.DataFrame({

        'Name': [name],
        'Email': [email],
        'Phone': [phone],
        'DOB': [dob],
        'Experience': [experience],
        'Current Company': [current_company],
        'College': [college],
        'Skills': [skills],
        'Generated Text': [generated_text],
        'Total Words': [total_words],
        'Most Common Words': [common_words]
    })
    print("DataFrame created:")
    print(df)
    insert_into_postgres(df)
 
if __name__ == "__main__":

    main()

 

DataFrame created:
        Name                Email       Phone         DOB Experience  \
0  SHIBU V G  shibu8940@gmail.com  8940370437  1998-02-03              

                                     Current Company  \
0  Project Title: Website in Company Profile[sample]   

                          College Skills Generated Text  Total Words  \
0  Narayana Guru College with 65%     []           None          412   

                                   Most Common Words  
0  [(and, 24), (in, 11), (to, 10), (with, 9), (ex...  
Data inserted successfully into PostgreSQL
