In [1]:
import json
import re
from pdfminer.high_level import extract_text

def parse_cv(cv_text):
    """
    Parses the CV text and returns a dictionary with the extracted information.
    For simplicity, this example extracts name, email, phone, and experience.
    """
    cv_data = {}

    # Extract name (assuming the name is the first line of the CV)
    lines = cv_text.split('\n')
    cv_data['name'] = lines[0].strip()

    # Extract email
    email = re.search(r'[\w\.-]+@[\w\.-]+', cv_text)
    if email:
        cv_data['email'] = email.group(0)
    
    # Extract phone number (assuming a simple phone number format)
    phone = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', cv_text)
    if phone:
        cv_data['phone'] = phone.group(0)
    
    # Extract experience (assuming a section header "Experience" and listing each job on a new line)
    experience_section = re.search(r'Experience[\s\S]*', cv_text, re.IGNORECASE)
    if experience_section:
        experience_lines = experience_section.group(0).split('\n')[1:]  # Skip the "Experience" header
        experience = [line.strip() for line in experience_lines if line.strip()]
        cv_data['experience'] = experience

    return cv_data

def main():
    file_location = input("Enter the file location of the CV: ")

    try:
        # Extract text from the PDF file
        cv_text = extract_text(file_location)

        cv_data = parse_cv(cv_text)
        
        json_output = json.dumps(cv_data, indent=4)
        print(json_output)

    except FileNotFoundError:
        print(f"The file at {file_location} was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()


Enter the file location of the CV: C:\Users\Lenovo\Downloads\splcv2ndformat.pdf
{
    "name": "Jayanth Reddy Udumula",
    "email": "jayanthreddy.me@gmail.com",
    "phone": "9390633876",
    "experience": [
        "\uf0b7  Collaborated with a cross-functional team to preprocess and analyze large datasets, improving model accuracy by",
        "percentage.",
        "\uf0b7  Tech stacks used:  Python,  Machine Learning",
        "Fifth Force",
        "Machine Learning Engineer",
        "June'23",
        "\uf0b7  Developed and implemented machine learning models, contributing to a project focused on the project goal or",
        "application.",
        "\uf0b7  Contributed to the documentation of the machine learning pipeline, ensuring knowledge transfer within the team.The",
        "application is made available for Android and Web platforms.",
        "\uf0b7      Tech stacks used:  Python,  Machine Learning",
        "PROJECTS",
        "E-mail Automation                        