# Resume Parser

In [13]:
#Importing required libraries
import pandas as pd
import numpy as np
import os
from io import StringIO
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
import docx
import textract
import spacy
import logging
import re

In [14]:
#Function to convert resume contents into text
def resume_extract(file):
    with open(file, 'rb') as in_file:
        if file.endswith(".pdf"):
            output_string = StringIO()
            parser = PDFParser(in_file)
            doc = PDFDocument(parser)
            rsrcmgr = PDFResourceManager()
            device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
            interpreter = PDFPageInterpreter(rsrcmgr, device)
            for page in PDFPage.create_pages(doc):
                interpreter.process_page(page)
                text = output_string.getvalue()
                text = str(text.rsplit('\n'))
                text = text.replace("'", " ")
            return(text)
        if file.endswith(".docx"):
            #doc = docx.Document(file)
            doc = textract.process(file)
            doc = doc.decode('utf-8').strip('\t')
            doc = str(doc.split('\n'))
            doc = doc.replace("'"," ")
            return(doc)

In [15]:
#Generating list of extensions and dictionary for common skills
AVAILABLE_EXTENSIONS = {'.csv', '.doc', '.docx', '.eml', '.epub', '.gif', '.htm', '.html', '.jpeg', '.jpg', '.json',
                        '.log', '.mp3', '.msg', '.odt', '.ogg', '.pdf', '.png', '.pptx', '.ps', '.psv', '.rtf', '.tff',
                        '.tif', '.tiff', '.tsv', '.txt', '.wav', '.xls', '.xlsx'}
extractors ={}
extractors["experience"] = ['Teacher', 'teaching', 'tutor', 'developer', 'software developer', 'software engineer','hardware engineer', 'project director' ,'internship', 'intern', 'research assistant']
extractors["platform"] = ['linux', 'windows', 'mac']
extractors["database"] = ['SQL', 'NoSQL', 'Oracle']
extractors["programming"] = ['python', 'java', 'c', 'c++', '.net', 'matlab', 'vhdl', 'php']

In [16]:
#defining functions for regular expression (re) matching
def term_match(string_to_search, term):
    try:
        regular_expression = re.compile(term, re.IGNORECASE)
        result = re.findall(regular_expression, string_to_search)
        if len(result) > 0:
            return result[0]
        else:
            return None
    except Exception:
        return None
def term_count(string_to_search, term):
    try:
        regular_expression = re.compile(term, re.IGNORECASE)
        result = re.findall(regular_expression, string_to_search)
        return len(result)
    except Exception:
        return 0

In [17]:
#Creating regular expression (regex) for Phone Number and EmailID
EMAIL_REGEX = r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}"
PHONE_REGEX = r'(\d{3}[-\.\s\).\·]??\d{3}[-\.\s\).\·]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4})'

In [18]:
#Define function fpr extracting the Candidate Name
def candidate_name_extractor(input_string):
    nlp = spacy.load('en_core_web_lg')
    doc = nlp(input_string)
    for ent in doc.ents:
        if(ent.label_ == 'PERSON'):
            Name = ent.text
            return Name

In [19]:
#Define function for extracting the basic skills
def extract_fields(df):
    for extractor, items_of_interest in extractors.items():
        df[extractor] = df['text'].apply(lambda x: extract_skills(x, extractor, items_of_interest))
    return df


def extract_skills(resume_text, extractor, items_of_interest):
    potential_skills_dict = dict()
    matched_skills = set()

    # TODO This skill input formatting could happen once per run, instead of once per observation.
    for skill_input in items_of_interest:

        # Format list inputs
        if type(skill_input) is list and len(skill_input) >= 1:
            potential_skills_dict[skill_input[0]] = skill_input

        # Format string inputs
        elif type(skill_input) is str:
            potential_skills_dict[skill_input] = [skill_input]
        else:
            logging.warn('Unknown skill listing type: {}. Please format as either a single string or a list of strings'
                         ''.format(skill_input))

    for (skill_name, skill_alias_list) in potential_skills_dict.items():

        skill_matches = 0
        # Iterate through aliases
        for skill_alias in skill_alias_list:
            # Add the number of matches for each alias
            skill_matches += term_count(resume_text, skill_alias.lower())

        # If at least one alias is found, add skill name to set of skills
        if skill_matches > 0:
            matched_skills.add(skill_name)

    return matched_skills

In [20]:
#Define Main function documentation template
def main():
    logging.getLogger().setLevel(logging.INFO)
    observations = extract()
    nlp = spacy.load('en_core_web_lg')
    observations, nlp = transform(observations, nlp)
    load(observations, nlp)

    pass

In [21]:
#Define a function to extract all details from resume using aboce define functions
def extract():
    logging.info('Begin extract')

    # Reference variables
    candidate_file_agg = list()

    # Create list of candidate files
    for root, subdirs, files in os.walk('D:\Mukul\Amity\Project\ResumeParser\Resume'):
        folder_files = map(lambda x: os.path.join(root, x), files)
        candidate_file_agg.extend(folder_files)

    # Convert list to a pandas DataFrame
    observations = pd.DataFrame(data=candidate_file_agg, columns=['file_path'])
    logging.info('Found {} candidate files'.format(len(observations.index)))

    # Subset candidate files to supported extensions
    observations['extension'] = observations['file_path'].apply(lambda x: os.path.splitext(x)[1])
    observations = observations[observations['extension'].isin(AVAILABLE_EXTENSIONS)]

    # Attempt to extract text from files
    observations['text'] = observations['file_path'].apply(resume_extract)
    logging.info('End extract')
    return observations

In [22]:
#Define a function to extract the details from text
def transform(observations, nlp):
    # TODO Docstring
    logging.info('Begin transform')

    # Extract candidate name
    observations['candidate_name'] = observations['text'].apply(lambda x:
                                                                candidate_name_extractor(x))
    # Extract contact fields
    observations['email'] = observations['text'].apply(lambda x: term_match(x, EMAIL_REGEX))
    observations['phone'] = observations['text'].apply(lambda x: term_match(x, PHONE_REGEX))

    # Extract skills
    observations = extract_fields(observations)
    logging.info('End transform')
    return observations, nlp

In [23]:
#Define a function to load all the extracted data in describe loaction
def load(observations, nlp):
    logging.info('Begin load')
    output_path = os.path.join('D:\Mukul\Amity\Project\ResumeParser', 'resume_summary.csv')

    logging.info('Results being output to {}'.format(output_path))
    print('Results output to {}'.format(output_path))

    observations.to_csv(path_or_buf=output_path, index_label='index')
    logging.info('End load')
    pass

In [24]:
# Main section
if __name__ == '__main__':
    main()

INFO:root:Begin extract
INFO:root:Found 22 candidate files
INFO:pdfminer.pdfdocument:xref found: pos=b'116131'
INFO:pdfminer.pdfdocument:read_xref_from: start=116131, token=/b'xref'
INFO:pdfminer.pdfdocument:xref objects: {1: (None, 113580, 0), 2: (None, 19, 0), 3: (None, 5115, 0), 4: (None, 115493, 0), 5: (None, 5136, 0), 6: (None, 10498, 0), 7: (None, 10519, 0), 8: (None, 10712, 0), 9: (None, 11003, 0), 10: (None, 11168, 0), 11: (None, 13941, 0), 12: (None, 13963, 0), 13: (None, 14150, 0), 14: (None, 14441, 0), 15: (None, 14598, 0), 16: (None, 49136, 0), 17: (None, 49159, 0), 18: (None, 49359, 0), 19: (None, 49932, 0), 20: (None, 50357, 0), 21: (None, 71272, 0), 22: (None, 71295, 0), 23: (None, 71505, 0), 24: (None, 71995, 0), 25: (None, 72343, 0), 26: (None, 99758, 0), 27: (None, 99781, 0), 28: (None, 99986, 0), 29: (None, 100550, 0), 30: (None, 100972, 0), 31: (None, 112604, 0), 32: (None, 112627, 0), 33: (None, 112841, 0), 34: (None, 113203, 0), 35: (None, 113443, 0), 36: (None, 1

INFO:pdfminer.pdfinterp:get_font: create: objid=32, spec={'Type': /'Font', 'Subtype': /'TrueType', 'BaseFont': /'EAAAAA+OpenSymbol', 'FirstChar': 0, 'LastChar': 1, 'Widths': [365, 355], 'FontDescriptor': <PDFObjRef:30>, 'ToUnicode': <PDFObjRef:31>}
INFO:pdfminer.pdfpage:Page: {'Type': /'Page', 'Parent': <PDFObjRef:12>, 'Resources': <PDFObjRef:34>, 'MediaBox': [0, 0, 612, 792], 'Annots': [<PDFObjRef:8>, <PDFObjRef:9>], 'Group': {'S': /'Transparency', 'CS': /'DeviceRGB', 'I': True}, 'Contents': <PDFObjRef:5>}
INFO:pdfminer.pdfinterp:Processing page: <PDFPage: Resources={'Font': <PDFObjRef:33>, 'ProcSet': [/'PDF', /'Text']}, MediaBox=[0, 0, 612, 792]>
INFO:pdfminer.pdfinterp:render_contents: resources={'Font': <PDFObjRef:33>, 'ProcSet': [/'PDF', /'Text']}, streams=[<PDFStream(5): raw=1786, {'Length': <PDFObjRef:6>, 'Filter': /'FlateDecode'}>], ctm=(1, 0, 0, 1, 0, 0)
INFO:pdfminer.pdfdocument:xref found: pos=b'187770'
INFO:pdfminer.pdfdocument:read_xref_from: start=187770, token=/b'xref'
I

INFO:pdfminer.pdfinterp:Processing page: <PDFPage: Resources={'Font': {'F2': <PDFObjRef:9>, 'F8': <PDFObjRef:30>, 'F5': <PDFObjRef:18>, 'F6': <PDFObjRef:20>, 'F1': <PDFObjRef:5>, 'F4': <PDFObjRef:13>, 'F7': <PDFObjRef:25>}, 'ExtGState': {'GS7': <PDFObjRef:7>, 'GS8': <PDFObjRef:8>}, 'ProcSet': [/'PDF', /'Text', /'ImageB', /'ImageC', /'ImageI']}, MediaBox=[0, 0, 612, 792]>
INFO:pdfminer.pdfinterp:render_contents: resources={'Font': {'F2': <PDFObjRef:9>, 'F8': <PDFObjRef:30>, 'F5': <PDFObjRef:18>, 'F6': <PDFObjRef:20>, 'F1': <PDFObjRef:5>, 'F4': <PDFObjRef:13>, 'F7': <PDFObjRef:25>}, 'ExtGState': {'GS7': <PDFObjRef:7>, 'GS8': <PDFObjRef:8>}, 'ProcSet': [/'PDF', /'Text', /'ImageB', /'ImageC', /'ImageI']}, streams=[<PDFStream(37): raw=2935, {'Filter': /'FlateDecode', 'Length': 2933}>], ctm=(1, 0, 0, 1, 0, 0)
INFO:pdfminer.pdfdocument:xref found: pos=b'116'
INFO:pdfminer.pdfdocument:read_xref_from: start=116, token=/b'xref'
INFO:pdfminer.pdfdocument:xref objects: {75: (None, 16, 0), 76: (Non

INFO:pdfminer.pdfdocument:trailer: {'Size': 33, 'Root': <PDFObjRef:31>, 'Info': <PDFObjRef:32>, 'ID': [b'\xcc/6\xd5\xe0\xc26\x85\xbe\xa4g\x8fA\xb5\x06!', b'\xcc/6\xd5\xe0\xc26\x85\xbe\xa4g\x8fA\xb5\x06!'], 'DocChecksum': /'E0387D3A27B9D79E095D9B7302B85596'}
INFO:pdfminer.pdfdocument:trailer: {'Size': 33, 'Root': <PDFObjRef:31>, 'Info': <PDFObjRef:32>, 'ID': [b'\xcc/6\xd5\xe0\xc26\x85\xbe\xa4g\x8fA\xb5\x06!', b'\xcc/6\xd5\xe0\xc26\x85\xbe\xa4g\x8fA\xb5\x06!'], 'DocChecksum': /'E0387D3A27B9D79E095D9B7302B85596'}
INFO:pdfminer.pdfpage:Pages: Kids=[<PDFObjRef:1>, <PDFObjRef:4>]
INFO:pdfminer.pdfpage:Page: {'Type': /'Page', 'Parent': <PDFObjRef:8>, 'Resources': <PDFObjRef:30>, 'MediaBox': [0, 0, 609.95905511811, 946.714960629921], 'Group': {'S': /'Transparency', 'CS': /'DeviceRGB', 'I': True}, 'Contents': <PDFObjRef:2>}
INFO:pdfminer.pdfinterp:Processing page: <PDFPage: Resources={'Font': <PDFObjRef:29>, 'ProcSet': [/'PDF', /'Text']}, MediaBox=[0, 0, 609.95905511811, 946.714960629921]>
INFO

INFO:pdfminer.pdfinterp:Processing page: <PDFPage: Resources={'ExtGState': {'GS8': <PDFObjRef:8>}, 'Font': {'FT14': <PDFObjRef:14>, 'FT19': <PDFObjRef:19>, 'FT9': <PDFObjRef:9>}}, MediaBox=[0, 0, 612, 792]>
INFO:pdfminer.pdfinterp:render_contents: resources={'ExtGState': {'GS8': <PDFObjRef:8>}, 'Font': {'FT14': <PDFObjRef:14>, 'FT19': <PDFObjRef:19>, 'FT9': <PDFObjRef:9>}}, streams=[<PDFObjRef:49>, <PDFObjRef:7>, <PDFObjRef:50>], ctm=(1, 0, 0, 1, 0, 0)
INFO:pdfminer.pdfinterp:get_font: create: objid=14, spec={'BaseFont': /'AUWCNV+TimesNewRomanPSMT', 'DescendantFonts': [<PDFObjRef:16>], 'Encoding': /'Identity-H', 'Subtype': /'Type0', 'ToUnicode': <PDFObjRef:15>, 'Type': /'Font'}
INFO:pdfminer.pdfinterp:get_font: create: objid=None, spec={'BaseFont': /'AUWCNV+TimesNewRomanPSMT', 'CIDSystemInfo': <PDFObjRef:48>, 'CIDToGIDMap': /'Identity', 'DW': 777, 'FontDescriptor': <PDFObjRef:17>, 'Subtype': /'CIDFontType2', 'Type': /'Font', 'W': [5, [408], 8, [833, 777], 11, 12, 333, 15, [250, 333], 1

INFO:pdfminer.pdfinterp:get_font: create: objid=None, spec={'Type': /'Font', 'Subtype': /'CIDFontType2', 'BaseFont': /'Ubuntu-Regular', 'CIDSystemInfo': {'Registry': b'Adobe', 'Ordering': b'Identity', 'Supplement': 0}, 'FontDescriptor': <PDFObjRef:23>, 'CIDToGIDMap': /'Identity', 'W': [0, [496, 528, 518, 854, 383, 399, 566, 229, 251, 570, 707, 658, 560, 615, 267, 566, 722, 864, 244, 555, 461, 638, 271, 574, 584, 585, 383, 570, 584, 493, 442, 515, 244, 603, 498, 584, 296, 771, 559, 559, 943, 559, 559, 559, 559, 559, 559, 559, 518, 381, 251, 322, 322, 626, 651, 667, 559, 922, 533, 467, 624, 496, 624, 658, 772, 584, 699, 638, 682, 770, 559, 507, 244]], 'Encoding': /'Identity-H', 'ToUnicode': <PDFStream(26): raw=869, {'Length': 868}>}
INFO:pdfminer.pdfinterp:get_font: create: objid=7, spec={'Type': /'Font', 'Subtype': /'Type0', 'BaseFont': /'Ubuntu-Bold', 'Encoding': /'Identity-H', 'DescendantFonts': [<PDFObjRef:20>], 'ToUnicode': <PDFObjRef:21>}
INFO:pdfminer.pdfinterp:get_font: create: o

INFO:pdfminer.pdfdocument:xref objects: {1: (None, 17, 0), 2: (None, 168, 0), 3: (None, 224, 0), 4: (None, 639, 0), 5: (None, 12316, 0), 6: (None, 12485, 0), 7: (None, 12725, 0), 8: (None, 12778, 0), 9: (None, 12831, 0), 10: (None, 13006, 0), 11: (None, 13252, 0), 12: (None, 13438, 0), 13: (None, 13576, 0), 14: (None, 13606, 0), 15: (None, 13772, 0), 16: (None, 13846, 0), 17: (None, 14092, 0), 18: (None, 14235, 0), 19: (None, 14265, 0), 20: (None, 14436, 0), 21: (None, 14510, 0), 22: (None, 14756, 0), 23: (None, 14919, 0), 24: (None, 15146, 0), 25: (None, 15279, 0), 26: (None, 15309, 0), 27: (None, 15470, 0), 28: (None, 15544, 0), 29: (None, 15785, 0), 30: (None, 16205, 0), 31: (None, 16510, 0), 32: (None, 16938, 0), 33: (None, 17257, 0), 34: (None, 17480, 0), 35: (None, 18021, 0), 36: (None, 18401, 0), 37: (None, 18756, 0), 132: (None, 22383, 0), 167: (None, 22434, 0), 215: (None, 22485, 0), 220: (None, 22536, 0), 225: (None, 22587, 0), 231: (None, 22638, 0), 236: (None, 22689, 0), 24

INFO:pdfminer.pdfinterp:render_contents: resources={'Font': {'F17': <PDFObjRef:10>, 'F8': <PDFObjRef:11>, 'F14': <PDFObjRef:12>, 'F18': <PDFObjRef:13>, 'F19': <PDFObjRef:14>, 'F7': <PDFObjRef:15>, 'F21': <PDFObjRef:16>, 'F20': <PDFObjRef:18>}, 'ProcSet': [/'PDF', /'Text']}, streams=[<PDFStream(7): raw=2981, {'Length': 2980, 'Filter': /'FlateDecode'}>], ctm=(1, 0, 0, 1, 0, 0)
INFO:pdfminer.pdfinterp:get_font: create: objid=10, spec={'Type': /'Font', 'Subtype': /'Type1', 'BaseFont': /'VXXGEA+CMBX12', 'FontDescriptor': <PDFObjRef:35>, 'FirstChar': 82, 'LastChar': 117, 'Widths': <PDFObjRef:31>}
INFO:pdfminer.pdfinterp:get_font: create: objid=11, spec={'Type': /'Font', 'Subtype': /'Type1', 'BaseFont': /'CMIFXJ+CMR10', 'FontDescriptor': <PDFObjRef:37>, 'FirstChar': 11, 'LastChar': 123, 'Widths': <PDFObjRef:30>}
INFO:pdfminer.pdfinterp:get_font: create: objid=12, spec={'Type': /'Font', 'Subtype': /'Type1', 'BaseFont': /'PLHDPU+CMSY10', 'FontDescriptor': <PDFObjRef:45>, 'FirstChar': 15, 'LastC

INFO:pdfminer.pdfinterp:get_font: create: objid=55, spec={'BaseFont': /'QTUNTQ+Cambria,Bold', 'FontDescriptor': <PDFObjRef:76>, 'ToUnicode': <PDFObjRef:77>, 'Type': /'Font', 'FirstChar': 32, 'LastChar': 122, 'Widths': [220, 0, 0, 0, 0, 0, 740, 0, 408, 408, 0, 592, 232, 337, 232, 0, 592, 592, 592, 592, 0, 0, 0, 0, 0, 0, 280, 0, 0, 0, 0, 0, 0, 652, 651, 573, 705, 578, 551, 646, 722, 350, 341, 682, 551, 846, 679, 695, 614, 695, 662, 513, 639, 676, 634, 961, 619, 0, 0, 0, 0, 0, 0, 371, 0, 535, 591, 469, 597, 531, 326, 520, 597, 314, 302, 592, 308, 890, 604, 569, 597, 591, 461, 459, 365, 597, 531, 798, 525, 531, 479], 'Subtype': /'TrueType'}
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(49): raw=192, {'Subtype': /'Image', 'ColorSpace': /'DeviceRGB', 'Width': 10, 'Height': 10, 'BitsPerComponent': 8, 'Filter': /'FlateDecode', 'DecodeParms': {'Predictor': 15, 'Columns': 10, 'Colors': 3}, 'SMask': <PDFObjRef:50>, 'Length': 191}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(48): raw

INFO:pdfminer.pdfinterp:get_font: create: objid=64, spec={'BaseFont': /'LBNUGT+Calibri,Bold', 'FontDescriptor': <PDFObjRef:80>, 'ToUnicode': <PDFObjRef:81>, 'Type': /'Font', 'FirstChar': 32, 'LastChar': 32, 'Widths': [226], 'Subtype': /'TrueType'}
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(62): raw=192, {'Subtype': /'Image', 'ColorSpace': /'DeviceRGB', 'Width': 10, 'Height': 10, 'BitsPerComponent': 8, 'Filter': /'FlateDecode', 'DecodeParms': {'Predictor': 15, 'Columns': 10, 'Colors': 3}, 'SMask': <PDFObjRef:50>, 'Length': 191}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(61): raw=192, {'Subtype': /'Image', 'ColorSpace': /'DeviceRGB', 'Width': 10, 'Height': 10, 'BitsPerComponent': 8, 'Filter': /'FlateDecode', 'DecodeParms': {'Predictor': 15, 'Columns': 10, 'Colors': 3}, 'SMask': <PDFObjRef:50>, 'Length': 191}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(60): raw=192, {'Subtype': /'Image', 'ColorSpace': /'DeviceRGB', 'Width': 10, 'Height': 10, 'BitsPerComponent':

INFO:pdfminer.pdfinterp:get_font: create: objid=10, spec={'Type': /'Font', 'Subtype': /'Type1', 'BaseFont': /'PAKZSA+MinionPro-Regular', 'FontDescriptor': <PDFObjRef:41>, 'Encoding': {'Type': /'Encoding', 'Differences': [33, /'f_t', /'f_f_i', /'f_i', /'f_l', /'f_f']}, 'ToUnicode': <PDFObjRef:42>, 'FirstChar': 33, 'LastChar': 37, 'Widths': [582, 798, 535, 533, 561]}
INFO:pdfminer.pdfinterp:get_font: create: objid=12, spec={'Type': /'Font', 'Subtype': /'Type1', 'BaseFont': /'SPOOYB+MinionPro-It', 'FontDescriptor': <PDFObjRef:32>, 'Encoding': /'MacRomanEncoding', 'FirstChar': 32, 'LastChar': 121, 'Widths': [229, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 334, 0, 0, 477, 477, 477, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 527, 681, 0, 0, 0, 0, 0, 876, 0, 0, 554, 0, 0, 468, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490, 486, 0, 508, 400, 0, 420, 0, 273, 0, 0, 249, 787, 532, 0, 486, 0, 366, 337, 307, 533, 0, 0, 0, 442]}
INFO:pdfminer.pdfinterp:get_font: create: objid=11, spec={'Type': /

INFO:pdfminer.pdfinterp:get_font: create: objid=25, spec={'Type': /'Font', 'Subtype': /'Type0', 'BaseFont': /'SymbolMT', 'Encoding': /'Identity-H', 'DescendantFonts': <PDFObjRef:26>, 'ToUnicode': <PDFObjRef:302>}
INFO:pdfminer.pdfinterp:get_font: create: objid=None, spec={'BaseFont': /'SymbolMT', 'Subtype': /'CIDFontType2', 'Type': /'Font', 'CIDToGIDMap': /'Identity', 'DW': 1000, 'CIDSystemInfo': <PDFObjRef:28>, 'FontDescriptor': <PDFObjRef:29>, 'W': <PDFObjRef:304>, 'Encoding': /'Identity-H', 'ToUnicode': <PDFStream(302): raw=227, {'Filter': /'FlateDecode', 'Length': 225}>}
INFO:pdfminer.pdfinterp:get_font: create: objid=30, spec={'Type': /'Font', 'Subtype': /'TrueType', 'Name': /'F8', 'BaseFont': /'ArialMT', 'Encoding': /'WinAnsiEncoding', 'FontDescriptor': <PDFObjRef:31>, 'FirstChar': 32, 'LastChar': 32, 'Widths': <PDFObjRef:305>}
INFO:pdfminer.pdfpage:Page: {'Type': /'Page', 'Parent': <PDFObjRef:2>, 'Resources': {'Font': {'F4': <PDFObjRef:16>, 'F5': <PDFObjRef:18>, 'F6': <PDFObjRef

INFO:pdfminer.pdfinterp:render_contents: resources={'ColorSpace': {'CSp': /'DeviceRGB', 'CSpg': /'DeviceGray', 'PCSp': <PDFObjRef:12>}, 'ExtGState': {'GSa': <PDFObjRef:13>, 'GState23': <PDFObjRef:14>, 'GState9': <PDFObjRef:15>}, 'Font': {'F22': <PDFObjRef:16>, 'F6': <PDFObjRef:21>, 'F7': <PDFObjRef:26>, 'F8': <PDFObjRef:31>}, 'Pattern': {}, 'XObject': {'Im12': <PDFObjRef:36>, 'Im16': <PDFObjRef:38>, 'Im20': <PDFObjRef:40>, 'Im26': <PDFObjRef:42>, 'Im30': <PDFObjRef:44>, 'Im34': <PDFObjRef:46>, 'Im38': <PDFObjRef:48>, 'Im42': <PDFObjRef:50>, 'Im46': <PDFObjRef:52>, 'Im50': <PDFObjRef:54>, 'Im54': <PDFObjRef:56>, 'Im58': <PDFObjRef:58>, 'Im62': <PDFObjRef:60>}}, streams=[<PDFObjRef:10>], ctm=(1, 0, 0, 1, 0, 0)
INFO:pdfminer.pdfinterp:get_font: create: objid=16, spec={'BaseFont': /'OpenSans-SemiBold', 'DescendantFonts': [<PDFObjRef:17>], 'Encoding': /'Identity-H', 'Subtype': /'Type0', 'ToUnicode': <PDFObjRef:20>, 'Type': /'Font'}
INFO:pdfminer.pdfinterp:get_font: create: objid=None, spec=

INFO:pdfminer.pdfinterp:get_font: create: objid=66, spec={'BaseFont': /'DejaVuSans', 'DescendantFonts': [<PDFObjRef:67>], 'Encoding': /'Identity-H', 'Subtype': /'Type0', 'ToUnicode': <PDFObjRef:70>, 'Type': /'Font'}
INFO:pdfminer.pdfinterp:get_font: create: objid=None, spec={'BaseFont': /'DejaVuSans', 'CIDSystemInfo': {'Ordering': b'Identity', 'Registry': b'Adobe', 'Supplement': 0}, 'CIDToGIDMap': /'Identity', 'FontDescriptor': <PDFObjRef:68>, 'Subtype': /'CIDFontType2', 'Type': /'Font', 'W': [0, [595, 514]], 'Encoding': /'Identity-H', 'ToUnicode': <PDFStream(70): raw=227, {'Filter': /'FlateDecode', 'Length': 225}>}
INFO:pdfminer.pdfdocument:xref found: pos=b'108892'
INFO:pdfminer.pdfdocument:read_xref_from: start=108892, token=/b'xref'
INFO:pdfminer.pdfdocument:xref objects: {1: (None, 9, 0), 2: (None, 108794, 0), 3: (None, 157, 0), 4: (None, 252, 0), 5: (None, 49771, 0), 6: (None, 289, 0), 7: (None, 344, 0), 8: (None, 1644, 0), 9: (None, 1664, 0), 10: (None, 3285, 0), 11: (None, 3306

INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(33): raw=1137, {'Type': /'XObject', 'Subtype': /'Image', 'Width': 146, 'Height': 275, 'BitsPerComponent': 8, 'ColorSpace': /'DeviceRGB', 'SMask': <PDFObjRef:31>, 'Length': <PDFObjRef:34>, 'Filter': /'FlateDecode'}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(37): raw=5668, {'Type': /'XObject', 'Subtype': /'Image', 'Width': 799, 'Height': 274, 'BitsPerComponent': 8, 'ColorSpace': /'DeviceRGB', 'SMask': <PDFObjRef:35>, 'Length': <PDFObjRef:38>, 'Filter': /'FlateDecode'}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(41): raw=230, {'Type': /'XObject', 'Subtype': /'Image', 'Width': 19, 'Height': 49, 'BitsPerComponent': 8, 'ColorSpace': /'DeviceRGB', 'SMask': <PDFObjRef:39>, 'Length': <PDFObjRef:42>, 'Filter': /'FlateDecode'}>
INFO:pdfminer.pdfinterp:Processing xobj: <PDFStream(45): raw=229, {'Type': /'XObject', 'Subtype': /'Image', 'Width': 19, 'Height': 49, 'BitsPerComponent': 8, 'ColorSpace': /'DeviceRGB', 'SMask': <PDFObj

Results output to D:\Mukul\Amity\Project\ResumeParser\resume_summary.csv
