In [2]:
import os
import pandas as pd
from PyPDF2 import PdfReader
import docx2txt
import nltk
from nltk import sent_tokenize, pos_tag, ne_chunk
from nltk import download, find


In [3]:
download('punkt')
download('averaged_perceptron_tagger')
download('maxent_ne_chunker')
download('words')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [5]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as pdf_file:
        pdf_reader = PdfReader(pdf_file)
        text = ''
        for page_num in range(len(pdf_reader.pages)):
            text += pdf_reader.pages[page_num].extract_text()
        return text

def extract_text_from_docx(docx_path):
    return docx2txt.process(docx_path)

def extract_names(txt):
    person_names = []
    for sent in sent_tokenize(txt):
        for chunk in ne_chunk(pos_tag(nltk.word_tokenize(sent))):
            if hasattr(chunk, 'label') and chunk.label() == 'PERSON':
                person_names.append(
                    ' '.join(chunk_leave[0] for chunk_leave in chunk.leaves())
                )
    return person_names

def parse_resume(file_path):
    if file_path.endswith('.pdf'):
        text = extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        text = extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file format")

    names = extract_names(text)

    # Additional parsing logic (extract skills, experience, etc.) can be added here

    return {'file_path': file_path, 'text': text, 'names': names}

def parse_resumes_in_directory(directory):
    resumes_data = []
    for filename in os.listdir(directory):
        if filename.endswith('.pdf') or filename.endswith('.docx'):
            file_path = os.path.join(directory, filename)
            parsed_resume = parse_resume(file_path)
            resumes_data.append(parsed_resume)
    return pd.DataFrame(resumes_data)


In [7]:
if __name__ == '__main__':
    resumes_directory = r"C:\Users\Dell\Desktop\RESUME\RESUMES FOLDER"  # Update with the directory containing your resumes
    resumes_df = parse_resumes_in_directory(resumes_directory)

    # Save the parsed resumes DataFrame to a CSV file
    resumes_df.to_csv('parsed_resumes.csv', index=False)

    # Prompt the user for input
    choice = input("Enter '1' to search by file number or '2' to search by keyword: ")

    if choice == '1':
        file_number = input("Enter the file number you want to access: ")
        try:
            file_info = resumes_df.iloc[int(file_number) - 1]
            print("File Path:", file_info['file_path'])
            print("Parsed Text:")
            print(file_info['text'])
            print("\nExtracted Names:")
            print(file_info['names'])
        except IndexError:
            print("File number not found.")

    elif choice == '2':
        keyword = input("Enter the keyword you want to search for: ")
        filtered_resumes = resumes_df[resumes_df['file_path'].str.contains(keyword, case=False)]
        if len(filtered_resumes) > 0:
            for index, row in filtered_resumes.iterrows():
                print(f"File Path: {row['file_path']}")
                print("Parsed Text:")
                print(row['text'])
                print("\nExtracted Names:")
                print(row['names'])
                print("\n----------------------------------\n")
        else:
            print("No resumes found matching the keyword.")

    else:
        print("Invalid choice. Please enter '1' or '2'.")


File Path: C:\Users\Dell\Desktop\RESUME\RESUMES FOLDER\11174187.pdf
Parsed Text:
RN / ASST. HEAD NURSE PRACTICE LEADER
Skills
care planning, Case Management, Home Health, Hospice, Infection control, injections, Nurse Manager, Oncology, scheduling, staff development,
Trauma, triage, tutoring, Urology
Experience
09/2010
 
to 
12/2011
Company Name
Hired as ADON (Assistant Director of Nursing) for an 85 to 90 bed long term care of Mentally and Physically disabled children from 2
years to upper 20's with a few older individuals.
My job there included staffing of all the nurses monthly, setting up transportation for all outside MD office visits of the residents, monitoring
the 3 nursing units of day to day care and documentation of the residents, taking call as needed and twice a month being MOD for the entire
building ,I conducted in-services for the nursing staff and answered all pages during the day with questions or to oversee any critical changes
of residents and helped with decisions t