# Resume Classification

Business objective :- The document classification solution should significantly reduce the manual human effort in the HRM. It should achieve a higher level of accuracy and automation with minimal human intervention

In [3]:
import os #Used for path
import re #Regex
import csv #text into csv file
from docx import Document #.Docx Convert into text
from PyPDF2 import PdfReader #pdf Convert into text
import win32com.client as win32 #used for open Word file Component Object Model (COM)
from win32com.client import constants
import pandas as pd
import glob

- Dataset are the form of .pdf,.Docx,.Doc format so,First we convert into text 

In [2]:
def extract_text_from_pdf(file_path): #This function is used for extract text from pdf
    with open(file_path, 'rb') as file:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_docx(file_path): #This function is used for extract text from Docx
    doc = Document(file_path)
    paragraphs = [p.text for p in doc.paragraphs]
    return '\n'.join(paragraphs)

def save_as_docx(file_path): #If Doc is not readed that this function convert Doc into Docx.by open Docx file and saveAS into Doc
    # Opening MS Word
    word = win32.gencache.EnsureDispatch('Word.Application')
    doc = word.Documents.Open(file_path)
    doc.Activate()

    # Rename path with .docx
    new_file_abs = os.path.abspath(file_path)
    new_file_abs = os.path.splitext(file_path)[0] + '.docx'

    # Save and Close
    word.ActiveDocument.SaveAs(
        new_file_abs, FileFormat=constants.wdFormatXMLDocument
    )
    doc.Close(False)

def clean_text(text):
    cleaned_text = re.sub(r'\s+', ' ', text)  # Remove extra space
    return cleaned_text

In [5]:
# Database path:
input_directory = r'C:\Users\abc\Desktop\Resume Classification\Resume1'
output_csv = 'Resume_text.csv'

with open(output_csv, 'w', newline='', encoding='utf-8') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Text',"Resume"])  # Write header row
    for file_path in glob.glob(os.path.join(input_directory, '**', '*'), recursive=True):
        if not os.path.isfile(file_path):
            continue

        resume = []
        file_name = os.path.basename(file_path)
        folder_path = os.path.dirname(file_path)
        if folder_path.lower().endswith("\\peoplesoft"):
            resume.append('peoplesoft') 
        elif folder_path.lower().endswith("\\workday"):
            resume.append('workday')
        elif folder_path.lower().endswith("\\sql developer"):
            resume.append('sql developer') 
        elif folder_path.lower().endswith("\\react developer"):                                  
            resume.append('react developer')
        #file_path = os.path.join(root, file_name)
        #file_path = os.path.join(input_directory, file_name)
        if file_name.lower().endswith('.pdf'):
            text = extract_text_from_pdf(file_path)
        elif file_name.lower().endswith('.docx'):
            text = extract_text_from_docx(file_path)
        elif file_name.lower().endswith('.doc'):
            save_as_docx(file_path)
            text = extract_text_from_docx(file_path + 'x')
            os.remove(file_path + 'x')  # Remove the temporary .docx file
        else:
            print(f"Skipping file: {file_name}. Unsupported file format.")
            continue
        
        cleaned_text = clean_text(text)
        writer.writerow([cleaned_text]+resume)

print("Extraction and cleaning complete. Saved to Resume_text.csv.")

Extraction and cleaning complete. Saved to Resume_text.csv.


# Spacy Library
- For NLP tasks we use Spacy library. Using of spacy we can perform Part-of-Speech Tagging, Dependency Parsing and Named Entity Recognition.

In [1]:
import spacy 
nlp = spacy.load('en_core_web_md') #loading spacy pre built model
from spacy.matcher import Matcher 

In [4]:
df = pd.read_csv('Resume_text.csv')  # uploading csv file

In [5]:
df

Unnamed: 0,Text,Resume
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft
1,Profile Summary: 7+ years of experience in im...,peoplesoft
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft
3,Murali Experience Summary I have 6 years of ex...,peoplesoft
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft
...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday
75,Seeking suitable positions in Workday HCM as T...,workday
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday


In [9]:
df["Text"][5]

" PROFILE SUMMARY I have overall 6.8 years’ experience as PeopleSoft Administrator. Installed PeopleTools 8.55 from the scratch including its products. Experience in Peopletools 8.51, 8.54.08, 8.55.07&Application 9.0 and 9.2 (HRMS/FSCM). Deterministic approach towards problem solving & troubleshooting. Proficient in Integration Broker. Upgraded FSCM and HCM applications to PeopleTools 8.55.07 from PeopleTools 8.54.08. EDUCATION Graduated B-Tech in Electronics and Communication Engg. from M.V.G.R College of Engineering, Vizianagaram(JNTUK) with an aggregate of 68.93%. Achieved 90% marks in 12th standard. Scored 86% in 10th standard. ACHIEVEMENTS Awarded Bravo in 2015 Q3, 2016 Q1 and Pat on Back in Q2, 2016, Q1, 2017 in Techahindra. Awarded Associate of the month award and Innovator of the month (1 time). Awarded spot and pat on back in Capgemini WORK EXPERIENCE CAPGEMINI (MAY’19 – TILL NOW) Production support for 5 finance environment and their respective non production environments. Pr

# Extracting skills

In [6]:
def extract_skills(resume_text):
    nlp_text = nlp(resume_text)

    # removing stop words and implementing word tokenization
    tokens = [token.text for token in nlp_text if not token.is_stop]
    # Set of skills if skills match with this text than they append in skills column
    skills = ["c","my sql","nodejs","node.js","reactjs","javascript","html","css","javascript","angular js",
              "js","fcsm","sql developer","peopleSoft","mysql","sql","plsql","nosql","rdbms","ddl" ,"dml","dcl"
              ,"sql developer","core hcm","xml", "xslt", "eib", "core connectors","workday",'hcm',"peoplesoft admin",
               "PeopleSoft Admin","dba","" ]
    
    skillset = []
    
    # check for one-grams (example: python)
    for token in tokens:
        if token.lower() in skills:
            skillset.append(token)
    
    #check for bi-grams and tri-grams (example: machine learning)
    for token in nlp_text.noun_chunks:
        token = token.text.lower().strip()
        if token in skills:
            skillset.append(token)
    
  
    return ', '.join(set([i.capitalize() for i in skillset]))

In [7]:
df["skills"] = df['Text'].apply(extract_skills)

In [8]:
df.head()

Unnamed: 0,Text,Resume,skills
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm"
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql"
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin


# Name Extraction

In [10]:
# In name extraction we use POS tagging. 
matcher = Matcher(nlp.vocab) 

def extract_name(resume_text):
    nlp_text = nlp(resume_text)
    
    # First name and Last name are always Proper Nouns
    pattern = [{'POS': 'PROPN'}, {'POS': 'PROPN'}]
    
    matcher.add('NAME', [pattern], on_match = None)
    
    matches = matcher(nlp_text)
    
    for match_id, start, end in matches:
        span = nlp_text[start:end]
        return span.text


In [11]:
df["Name"] = df['Text'].apply(extract_name)

In [13]:
df.sample(10)

Unnamed: 0,Text,Resume,skills,Name
9,PeopleSoft Admin/PeopleSoft DBA Ganesh Alladi...,peoplesoft,"Dba, Hcm, Peoplesoft admin",PeopleSoft Admin
47,KAMBALLA PRADEEP SYNOPSIS Looking forward to a...,sql developer,"Rdbms, Ddl, Dml, Dcl, Sql",KAMBALLA PRADEEP
26,PROFILE Searching for the opportunity to brin...,react developer,"Html, Css, Javascript, Mysql, Node.js",CORE SKILLS
66,Madeeswar A PROFILE SUMMARY: Having around 6 ...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Madeeswar A
31,Ui-Developer/ React JS Developer NAME: KRISHN...,react developer,"Html, Js, Css, Xml, Javascript, Node.js",Developer/ React
61,Harikrishna Akula Summary: 5.2 years of IT exp...,workday,"Xml, Hcm, Workday, Eib, Sql, Xslt, Core connec...",Harikrishna Akula
5,PROFILE SUMMARY I have overall 6.8 years’ exp...,peoplesoft,"Hcm, Sql",PeopleSoft Administrator
45,Aradhana Tripathi Current Location: Gachibowl...,sql developer,"Mysql, Dml, Ddl, Sql",Aradhana Tripathi
6,PEOPLESOFT Administrator SRINIVAS.K Experience...,peoplesoft,"Dba, Hcm, Sql",PEOPLESOFT Administrator
32,Ui-Developer/ React JS Developer NAME: KRISHN...,react developer,"Html, Js, Css, Xml, Javascript, Node.js",Developer/ React


# Previous company Experiences

In [14]:
nlp = spacy.load('en_core_web_lg')

def extract_experiences(text): #Extract Experience from resume
    doc = nlp(text)

    experiences = []
    for sent in doc.sents:
        if 'experience' in sent.text.lower():
            experiences.append(sent.text)

    return ', '.join(set(i for i in experiences))

In [15]:
df["experience"] = df["Text"].apply(extract_experiences)

In [16]:
df

Unnamed: 0,Text,Resume,skills,Name,experience
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,..."
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...
...,...,...,...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Workday Integration,Hands-on experience In Migrating the XSLT Code...
75,Seeking suitable positions in Workday HCM as T...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Workday HCM,Having good experience in building Studio inbo...
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Kumar S.S,Hands-on experience In Migrating the XSLT Code...
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday,"Xml, Hcm, Workday, Eib, Xslt",B Workday,Strong experience in building Workday Studio i...


# Education

In [23]:
def ed(text):  #Extract Education from Resume
    doc = nlp(text)

    edu = []
    for sent in doc.sents:
        if ("educational qualification" and 'education') in sent.text.lower():
            edu.append(sent.text)
    return ', '.join(set(i for i in edu))

In [24]:
df["Education"] = df["Text"].apply(ed)

In [25]:
df

Unnamed: 0,Text,Resume,skills,Name,experience,Education
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...,
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...,ǁ Education: Bachelors in Computer Science Ani...
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,...",
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...,
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...,Present Education Personal Profile
...,...,...,...,...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Workday Integration,Hands-on experience In Migrating the XSLT Code...,Education details: Bachelors in Electronics & ...
75,Seeking suitable positions in Workday HCM as T...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Workday HCM,Having good experience in building Studio inbo...,
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Kumar S.S,Hands-on experience In Migrating the XSLT Code...,Educational Summary: M.Tech in computer scienc...
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday,"Xml, Hcm, Workday, Eib, Xslt",B Workday,Strong experience in building Workday Studio i...,


In [26]:
df.head(50)

Unnamed: 0,Text,Resume,skills,Name,experience,Education
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...,
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...,ǁ Education: Bachelors in Computer Science Ani...
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,...",
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...,
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...,Present Education Personal Profile
5,PROFILE SUMMARY I have overall 6.8 years’ exp...,peoplesoft,"Hcm, Sql",PeopleSoft Administrator,"Experience in Peopletools 8.51, 8.54.08, 8.55....",EDUCATION Graduated B-Tech in Electronics and ...
6,PEOPLESOFT Administrator SRINIVAS.K Experience...,peoplesoft,"Dba, Hcm, Sql",PEOPLESOFT Administrator,Experience in online and offline cloning for t...,
7,PeopleSoft Admin VARKALA VIKAS Career Objecti...,peoplesoft,"Dba, Hcm, Peoplesoft admin",PeopleSoft Admin,Experience in Installing COBOL Software and co...,Good team player and a proven individual contr...
8,Vinod Akkala PeopleSoft DBA Admin. Professiona...,peoplesoft,"Dba, Hcm",Vinod Akkala,"Experience on People Tools Upgrade., Experienc...",KEY SKILS EDUCATIONAL QUALIFICATION M.ca.(Mast...
9,PeopleSoft Admin/PeopleSoft DBA Ganesh Alladi...,peoplesoft,"Dba, Hcm, Peoplesoft admin",PeopleSoft Admin,"Experience in standalone RAC., Experience in I...",Good team player and a proven individual contr...


# Extract Links

In [27]:
import re

def extract_website_links(text):
    # Regular expression pattern to match website links
    pattern = r'(https?://\S+)'

    # Find all matches using the pattern
    matches = re.findall(pattern, text)

    return ", ".join(matches)

In [28]:
text = df["Text"][24]

In [29]:
extract_website_links(text)

'https://www.linkedin.com/in/kamalakar-reddy-777682196/, http://demo.fortunapix.com/kkel/'

In [30]:
df["Link"] = df["Text"].apply(extract_website_links) 

In [31]:
df

Unnamed: 0,Text,Resume,skills,Name,experience,Education,Link
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...,,
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...,ǁ Education: Bachelors in Computer Science Ani...,
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,...",,
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...,,
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...,Present Education Personal Profile,
...,...,...,...,...,...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Workday Integration,Hands-on experience In Migrating the XSLT Code...,Education details: Bachelors in Electronics & ...,
75,Seeking suitable positions in Workday HCM as T...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Workday HCM,Having good experience in building Studio inbo...,,
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Kumar S.S,Hands-on experience In Migrating the XSLT Code...,Educational Summary: M.Tech in computer scienc...,
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday,"Xml, Hcm, Workday, Eib, Xslt",B Workday,Strong experience in building Workday Studio i...,,


# Extract phone number

In [32]:
def extract_phone_numbers(text):
    # Regular expression pattern to match phone numbers
    pattern = r'(?<!\d)(?:(?:\d{2}[-\s]?\d{8})|(?:\d{4}[-\s]?\d{6}))(?!\d)'

    # Find all matches using the pattern
    matches = re.findall(pattern, text)

    return ", ".join(matches)

In [35]:
#Example
number = "Phone number : 91 1234567890"
extract_phone_numbers(number)

'1234567890'

In [33]:
df["Phone_Number"] = df["Text"].apply(extract_phone_numbers)

In [34]:
df

Unnamed: 0,Text,Resume,skills,Name,experience,Education,Link,Phone_Number
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...,,,
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...,ǁ Education: Bachelors in Computer Science Ani...,,
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,...",,,
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...,,,
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...,Present Education Personal Profile,,
...,...,...,...,...,...,...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Workday Integration,Hands-on experience In Migrating the XSLT Code...,Education details: Bachelors in Electronics & ...,,
75,Seeking suitable positions in Workday HCM as T...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Workday HCM,Having good experience in building Studio inbo...,,,
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Kumar S.S,Hands-on experience In Migrating the XSLT Code...,Educational Summary: M.Tech in computer scienc...,,
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday,"Xml, Hcm, Workday, Eib, Xslt",B Workday,Strong experience in building Workday Studio i...,,,


# Experience in Year

In [36]:
def expDetails(Text):  #Extract experience in year from Resumes
    global sent
   
    Text = Text.split()
   
    for i in range(len(Text)-2):
        Text[i].lower()
        
        if Text[i] ==  'years':
            sent =  Text[i-2] + ' ' + Text[i-1] +' ' + Text[i] +' '+ Text[i+1] +' ' + Text[i+2]
            l = re.findall('\d*\.?\d+',sent)
            for i in l:
                a = float(i)
            return(a)
            return (sent)

In [37]:
df["YearExp"] = df["Text"].apply(expDetails)

In [38]:
df

Unnamed: 0,Text,Resume,skills,Name,experience,Education,Link,Phone_Number,YearExp
0,Anubhav Kumar Singh To work in a globally com...,peoplesoft,Hcm,Anubhav Kumar,Experience in Installing Oracle Policy Automat...,,,,
1,Profile Summary: 7+ years of experience in im...,peoplesoft,"Dba, Hcm",Human Capital,Skilled with the capability to analyse & inter...,ǁ Education: Bachelors in Computer Science Ani...,,,7.0
2,PeopleSoft Database Administrator Gangareddy P...,peoplesoft,"Dba, Hcm, Sql",PeopleSoft Database,"Experience in monitoring and scheduling Jobs.,...",,,,4.0
3,Murali Experience Summary I have 6 years of ex...,peoplesoft,Hcm,PeopleSoft Administration,Experience in Configuration and Setup the REN ...,,,,6.0
4,"Priyanka Ramadoss 61/46, MountPleasant, Coonoo...",peoplesoft,Peoplesoft admin,Priyanka Ramadoss,Experienced in setting up batch profile setup ...,Present Education Personal Profile,,,2016.0
...,...,...,...,...,...,...,...,...,...
74,Workday Integration Consultant Name : Sri Kri...,workday,"Xml, Hcm, Workday, Eib, Xslt, Core connectors",Workday Integration,Hands-on experience In Migrating the XSLT Code...,Education details: Bachelors in Electronics & ...,,,4.0
75,Seeking suitable positions in Workday HCM as T...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Workday HCM,Having good experience in building Studio inbo...,,,,4.0
76,WORKDAY | HCM | FCM Name : Kumar S.S Role : W...,workday,"Xml, Hcm, Core connectors, Workday, Eib, Xslt,...",Kumar S.S,Hands-on experience In Migrating the XSLT Code...,Educational Summary: M.Tech in computer scienc...,,,6.0
77,Venkateswarlu.B Workday Consultant Having 5.3 ...,workday,"Xml, Hcm, Workday, Eib, Xslt",B Workday,Strong experience in building Workday Studio i...,,,,5.3


- there same null value in columns like Education, link , phone So,drop that columns

NameError: name 'df' is not defined