In [581]:
from pdfminer.converter import TextConverter
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
import io

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as fh:
        # iterate over all pages of PDF document
        for page in PDFPage.get_pages(fh, caching=True, check_extractable=True,):
            # creating a resoure manager
            resource_manager = PDFResourceManager()
            
            # create a file handle
            fake_file_handle = io.StringIO()
            
            # creating a text converter object
            converter = TextConverter(
                                resource_manager, 
                                fake_file_handle, 
                                codec='utf-8', 
                                laparams=LAParams()
                        )

            # creating a page interpreter
            page_interpreter = PDFPageInterpreter(
                                resource_manager, 
                                converter
                            )

            # process current page
            page_interpreter.process_page(page)
            
            # extract text
            text = fake_file_handle.getvalue()
            yield text

            # close open handles
            converter.close()
            fake_file_handle.close()



In [582]:
text = ""
for page in extract_text_from_pdf("Curriculum Vitae -Shehan Krishan.pdf"):
    text += ' ' + page


In [583]:
text

' Shehan Krishan\nData Science Graduate\n\nContact:\n\n0767158801\n\nshehankrishan6@gmail.com\n\n34/25 Kelaninadee rd,Mulleriyawa \n\nhttps://www.linkedin.com/in/shehan-\nkrishan/\n\nTECHNICAL SKILLS:\n\nProgramming Languages\n\n●\n●\n●\n\nPython\nDart\nJavascript\n\nProfessional Profile:\n\nA motivated and results-oriented Ml Engineer with one \n\nyear of experience in AI /ML ,Web Development, API \n\nDevelopment,Mobile App Development . Possesses a \n\nstrong a strong Problem Solving,Time Management \n\nand Analytical Thinking.Adept at AI, Machine Learning, \n\nFlutter, Python, and API development.I am a mature \n\nteam  worker and adaptable to all challenging \n\nsituations. I can work well  both in a team environment \n\nas well as using my own initiative.Seeking to leverage \n\nmy skills and experience to contribute effectively to \n\nyour company where my skills can shine. Moreover,I \n\nam looking for an opportunity that offers me to \n\ndevelop  new skills while strengthening t

In [224]:
import re

def extract_mobile_number(text):
    phone = re.findall(re.compile(r'(?:(?:\+?([1-9]|[0-9][0-9]|[0-9][0-9][0-9])\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([0-9][1-9]|[0-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?'), text)
    
    if phone:
        number = ''.join(phone[0])
        if len(number) > 10:
            return '+' + number
        else:
            return number

In [225]:
extract_mobile_number(text)

'076715880'

In [599]:
from spacy.matcher import Matcher
import spacy
nlp = spacy.load('en_core_web_sm')
matcher = Matcher(nlp.vocab)

def extract_name(resume_text):
    nlp_text = nlp(resume_text)
    
    # First name and Last name are always Proper Nouns
    pattern = [{'POS': 'PROPN'}, {'POS': 'PROPN'}]
    
    matcher.add('NAME', None, pattern)
    
    matches = matcher(nlp_text)
    
    for match_id, start, end in matches:
        span = nlp_text[start:end]
        return span.text

In [600]:
extract_name(text)

'Shehan Krishan'

In [226]:
def extract_email(email):
    email = re.findall("([^@|\s]+@[^@]+\.[^@|\s]+)", email)
    if email:
        try:
            return email[0].split()[0].strip(';')
        except IndexError:
            return None

In [227]:
extract_email(text)

'shehankrishan6@gmail.com'

In [228]:
import spacy
import pandas as pd

nlp = spacy.load('en_core_web_sm')

def extract_skills(resume_text):
    nlp_text = nlp(resume_text)

    # removing stop words and implementing word tokenization
    tokens = [token.text for token in nlp_text if not token.is_stop]
    
    # reading the csv file
    data = pd.read_csv("skills.csv") 
    
    # extract values
    skills = list(data.columns.values)
    
    skillset = []
    
    # check for one-grams (example: python)
    for token in tokens:
        if token.lower() in skills:
            skillset.append(token)
    
    # check for bi-grams and tri-grams (example: machine learning)
    for chunk in nlp_text.noun_chunks:
        chunk_text = chunk.text.lower().strip()
        if chunk_text in skills:
            skillset.append(chunk_text)
    
    return list(set([i.capitalize() for i in skillset]))

In [601]:
skills = extract_skills(text)
print(skills) 

['Github', 'Technical skills', 'Analysis', 'Automation', 'Startup', 'Ai', 'Database', 'Java', 'Oracle', 'Css', 'Technical', 'Machine learning', 'Flask', 'Programming', 'Research', 'Tableau', 'Sql', 'Javascript', 'Pandas', 'Flutter', 'Apis', 'Mysql', 'Python', 'Html', 'Api', 'Mobile', 'Data analysis', 'Analytical', 'Etl', 'Aws', 'International', 'Time management']


In [236]:
new_text = re.sub(r'●\n', ' ', text)

In [237]:
new_text = re.sub(r'\n', ' ', new_text)

In [238]:
new_text = re.sub(r':', ' - ', new_text)

In [239]:
new_text

' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development . Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations. I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine. Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Application Development

In [280]:
EDUCATION = ['BSC', 'B.Tech', 'B.E.', 'M.Sc.', 'M.Tech', 'M.E.', 'Ph.D.', 'DIPLOMA','GCE']
STOPWORDS = ['and', 'in', 'of', 'at']


def extract_education(resume_text):
    nlp_text = nlp(resume_text)

    # Sentence Tokenizer
    nlp_text = [sent.text.strip() for sent in nlp_text.sents]
    # nlp_text = [sent.string.strip() for sent in nlp_text.sents]
    print("nlp_text",nlp_text)
    
    edu = {}
    # Extract education degree
    for index, text in enumerate(nlp_text[:-1]):
        # print(text)
        text = re.sub(r'[?|$|.|!|,]', r'', text)
        # print(text)

        # next_text = nlp_text[index + 1]
        # print("next_text",next_text)
        # next_text = re.sub(r'[?|$|.|!|,]', r'', next_text)
        # print("next_text",next_text)
        for tex in text.split():
            # print("tex",tex)
            if tex.upper() in EDUCATION and tex not in STOPWORDS:
                next_text = nlp_text[index + 1]
                next_text = re.sub(r'[?|$|.|!|,]', r'', next_text)
                cleaned_text = re.sub(r'●\n', '', next_text)
                print(cleaned_text)
                # for x in range(len(nlp_text) - index):
                #     print(x)
                #     next_text = nlp_text[index + x]
                #     print("next_text",next_text)
                #     years = re.findall(r'\b\d{4}\b', next_text)
                #     if years:
                #         edu["years"] = years
                #         break
                edu[tex] = cleaned_text
                # edu["gg"] = cleaned_text
    print(edu)

    # Extract year
    education = []
    # for key in edu.keys():
    #     year = re.search(r'(\b(?:20|19)\d{2}\b)', edu[key])
    #     if year:
    #         education.append((key, year.group(0)))
    #     else:
    #         education.append(key)
    return education,edu

In [603]:
education,edu = extract_education(new_text)
print(education)

nlp_text [' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development .', 'Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations.', 'I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine.', 'Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Appl

In [604]:
EDUCATION = ['BSC', 'B.Tech', 'B.E.', 'M.Sc.', 'M.Tech', 'M.E.', 'Ph.D.']
STOPWORDS = ['and', 'in', 'of', 'at']


def extract_education(resume_text):
    # nlp_text = nlp(resume_text)

    # Sentence Tokenizer
    # nlp_text = [sent.text.strip() for sent in nlp_text.sents]
    nlp_text = nltk.sent_tokenize(resume_text)
    # nlp_text = [sent.string.strip() for sent in nlp_text.sents]
    print("nlp_text",nlp_text)
    
    edu = {}
    # Extract education degree
    for index, text in enumerate(nlp_text[:-1]):
        # print(text)
        text = re.sub(r'[?|$|.|!|,]', r'', text)
        # print(text)

        # next_text = nlp_text[index + 1]
        # print("next_text",next_text)
        # next_text = re.sub(r'[?|$|.|!|,]', r'', next_text)
        # print("next_text",next_text)
        for tex in text.split():
            # print("tex",tex)
            if tex.upper() in EDUCATION and tex not in STOPWORDS:
                next_text = nlp_text[index + 1]
                next_text = re.sub(r'[?|$|.|!|,]', r'', next_text)
                cleaned_text = re.sub(r'●\n', '', next_text)
                print(cleaned_text)
                # next_text = nlp_text[index + 2]
                # print("next_text 2:",next_text)
                # years = re.findall(r'\b\d{4}\b', next_text)
                # print(years)
                for x in range(len(nlp_text) - index + 1):
                    next_text_c = nlp_text[index + x +1]
                    years = re.findall(r'\b\d{4}\b', next_text_c)
                    if years:
                        edu["years"] = years
                        # print(years)
                        break
                edu[tex] = cleaned_text
                # edu["gg"] = cleaned_text
    print(edu)
    return edu

In [607]:
edu = extract_education(new_text)
print(education)

nlp_text [' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development .', 'Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations.', 'I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine.', 'Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Appl

In [608]:
edu

{'years': ['2019', '2023'],
 'BSc': 'Special (Hons) – Information   Technology (Specialization – Data Science)   at Sri Lanka Institute of Information   Technology (SLIIT) Graduate'}

In [609]:
new_text

' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development . Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations. I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine. Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Application Development

In [376]:
# Using regular expressions to extract work experience
experience = re.findall(r'(?s)(?<=Experience:)(.*?)(?=\n\n)', text)

for exp in experience:
    print(exp.strip())
    print("-" * 50)


--------------------------------------------------
AI /Ml Engineer - ICT Options PVT
--------------------------------------------------

--------------------------------------------------
Research
--------------------------------------------------


In [375]:
text

' Shehan Krishan\nData Science Graduate\n\nContact:\n\n0767158801\n\nshehankrishan6@gmail.com\n\n34/25 Kelaninadee rd,Mulleriyawa \n\nhttps://www.linkedin.com/in/shehan-\nkrishan/\n\nTECHNICAL SKILLS:\n\nProgramming Languages\n\n●\n●\n●\n\nPython\nDart\nJavascript\n\nProfessional Profile:\n\nA motivated and results-oriented Ml Engineer with one \n\nyear of experience in AI /ML ,Web Development, API \n\nDevelopment,Mobile App Development . Possesses a \n\nstrong a strong Problem Solving,Time Management \n\nand Analytical Thinking.Adept at AI, Machine Learning, \n\nFlutter, Python, and API development.I am a mature \n\nteam  worker and adaptable to all challenging \n\nsituations. I can work well  both in a team environment \n\nas well as using my own initiative.Seeking to leverage \n\nmy skills and experience to contribute effectively to \n\nyour company where my skills can shine. Moreover,I \n\nam looking for an opportunity that offers me to \n\ndevelop  new skills while strengthening t

In [377]:
# Using regular expressions to extract job roles and corresponding sentences
job_roles = re.findall(r'([A-Z][a-z]+ [A-Z][a-z]+) - ([\w\s,]+)\n\s*:\s*([\w\s,/.()-]+)\n', text)

for role, company, responsibilities in job_roles:
    print("Job Role:", role)
    print("Company:", company)
    print("Responsibilities:", responsibilities.strip())
    print("-" * 50)

Job Role: Ml Engineer
Company: ICT Options PVT

Responsibilities: 02/2023 - 04/2024
--------------------------------------------------
Job Role: Data Analyst
Company: Dialog Axiata PLC

Responsibilities: 12/2021 - 09/2022
--------------------------------------------------


In [430]:
text = ""
for page in extract_text_from_pdf("Curriculum Vitae -Shehan Krishan.pdf"):
    text += ' ' + page

In [413]:
# job_roles = re.findall(r'([A-Z][a-z]+ [A-Z][a-z]+) - ([\w\s,]+)\n\s*:\s*([\w\s,/.()-]+)\n', text)
job_roles = re.findall(r'([A-Z][a-z]+ [A-Z][a-z]+) - ([\w\s,()]+)\n\s*:\s*([\w\s,/.()-]+)\n', text)

for role, company, responsibilities in job_roles:
    print("Job Role:", role)
    print("Company:", company)
    print("Responsibilities:", responsibilities.strip())
    print("-" * 50)

In [431]:
text

' Shehan Krishan\nData Science Graduate\n\nContact:\n\n0767158801\n\nshehankrishan6@gmail.com\n\n34/25 Kelaninadee rd,Mulleriyawa \n\nhttps://www.linkedin.com/in/shehan-\nkrishan/\n\nTECHNICAL SKILLS:\n\nProgramming Languages\n\n●\n●\n●\n\nPython\nDart\nJavascript\n\nProfessional Profile:\n\nA motivated and results-oriented Ml Engineer with one \n\nyear of experience in AI /ML ,Web Development, API \n\nDevelopment,Mobile App Development . Possesses a \n\nstrong a strong Problem Solving,Time Management \n\nand Analytical Thinking.Adept at AI, Machine Learning, \n\nFlutter, Python, and API development.I am a mature \n\nteam  worker and adaptable to all challenging \n\nsituations. I can work well  both in a team environment \n\nas well as using my own initiative.Seeking to leverage \n\nmy skills and experience to contribute effectively to \n\nyour company where my skills can shine. Moreover,I \n\nam looking for an opportunity that offers me to \n\ndevelop  new skills while strengthening t

In [432]:
lines = text.split('\n')
lines

[' Shehan Krishan',
 'Data Science Graduate',
 '',
 'Contact:',
 '',
 '0767158801',
 '',
 'shehankrishan6@gmail.com',
 '',
 '34/25 Kelaninadee rd,Mulleriyawa ',
 '',
 'https://www.linkedin.com/in/shehan-',
 'krishan/',
 '',
 'TECHNICAL SKILLS:',
 '',
 'Programming Languages',
 '',
 '●',
 '●',
 '●',
 '',
 'Python',
 'Dart',
 'Javascript',
 '',
 'Professional Profile:',
 '',
 'A motivated and results-oriented Ml Engineer with one ',
 '',
 'year of experience in AI /ML ,Web Development, API ',
 '',
 'Development,Mobile App Development . Possesses a ',
 '',
 'strong a strong Problem Solving,Time Management ',
 '',
 'and Analytical Thinking.Adept at AI, Machine Learning, ',
 '',
 'Flutter, Python, and API development.I am a mature ',
 '',
 'team  worker and adaptable to all challenging ',
 '',
 'situations. I can work well  both in a team environment ',
 '',
 'as well as using my own initiative.Seeking to leverage ',
 '',
 'my skills and experience to contribute effectively to ',
 '',
 'you

In [436]:
# Experience:
experience_index = lines.index('Experience:')
experience_index

112

In [466]:
def get_number_of_pages(file_name):
    try:
        if isinstance(file_name, io.BytesIO):
            # for remote pdf file
            count = 0
            for page in PDFPage.get_pages(
                        file_name,
                        caching=True,
                        check_extractable=True
            ):
                count += 1
            return count
        else:
            # for local pdf file
            if file_name.endswith('.pdf'):
                count = 0
                with open(file_name, 'rb') as fh:
                    for page in PDFPage.get_pages(
                            fh,
                            caching=True,
                            check_extractable=True
                    ):
                        count += 1
                return count
            else:
                return None
    except PDFSyntaxError:
        return None

In [467]:
get_number_of_pages("Shehan_Krishan_CV (1).pdf")

2

In [475]:
def extract_text_from_pdf(pdf_path,mxpage):
    with open(pdf_path, 'rb') as fh:
        # iterate over all pages of PDF document
        for page in PDFPage.get_pages(fh, caching=True, check_extractable=True,maxpages=mxpage):
            # creating a resoure manager
            resource_manager = PDFResourceManager()
            
            # create a file handle
            fake_file_handle = io.StringIO()
            
            # creating a text converter object
            converter = TextConverter(
                                resource_manager, 
                                fake_file_handle, 
                                codec='utf-8', 
                                laparams=LAParams()
                        )

            # creating a page interpreter
            page_interpreter = PDFPageInterpreter(
                                resource_manager, 
                                converter
                            )

            # process current page
            page_interpreter.process_page(page)
            
            # extract text
            text = fake_file_handle.getvalue()
            yield text

            # close open handles
            converter.close()
            fake_file_handle.close()

In [476]:
def extract_text(file_path, extension,mxpage):
    text_pdf = ''
    if extension == '.pdf':
        for page in extract_text_from_pdf(file_path,mxpage):
            text_pdf += ' ' + page
    return text_pdf

In [499]:
text_pdf = extract_text("Curriculum Vitae -Shehan Krishan.pdf",'.pdf',1)

In [493]:
lines = text_pdf.split('\n')

In [494]:
len(lines)

1

In [495]:
lines

[' \x0c']

In [491]:
# Splitting the text by new lines
lines = text.split('\n')

# Finding the index of 'Experience' in the text
experience_index = lines.index('Experience:')

# Iterating over the lines after 'Experience' to extract job roles, companies, and responsibilities
job_roles = []
# for line in lines[experience_index + 1:]:
for idx, line in enumerate(lines[experience_index + 1:]):
    if 180 < experience_index + idx :
        print(experience_index+idx)
        break
    # print(idx)
    # Checking if the line contains a job role (Engineer, Manager, Intern, etc.)
    if any(role in line.lower() for role in ['engineer', 'manager', 'intern']):
        for _,y in enumerate(lines[experience_index + idx + 2:]):
            # next_text_c = nlp_text[y]
            # print(next_text_c)
            years = re.findall(r'\b\d{4}\b', y)
            if years:
                print(y)
                break
            
        # Appending the job role, company, and responsibilities to the list
        job_roles.append(line.strip())
        job_roles.append((line.strip(), y))
    
# Printing the extracted job roles, companies, and responsibilities
for job in job_roles:
    print(job)

 : 02/2023 - 04/2024 
 : 03/2023 - 09/2023
181
AI /Ml Engineer - ICT Options PVT
('AI /Ml Engineer - ICT Options PVT', ' : 02/2023 - 04/2024 ')
Junior Ml Engineer - Emuq Tech (Miami, Florida US)
('Junior Ml Engineer - Emuq Tech (Miami, Florida US)', ' : 03/2023 - 09/2023')


In [438]:
# Splitting the text by new lines
lines = text.split('\n')

# Finding the index of 'Experience' in the text
experience_index = lines.index('Experience:')

# Iterating over the lines after 'Experience' to extract job roles, companies, responsibilities, and dates
job_roles = []
current_job = {}
for line in lines[experience_index + 1:]:
    if 'Experience' in line:
        # If a new job is encountered, add the current one to the list and initialize a new one
        if current_job:
            job_roles.append(current_job)
            current_job = {}
    elif any(role in line.lower() for role in ['engineer', 'manager', 'intern']):
        # Extracting job role and company
        current_job['role']= [part.strip() for part in line.split('-')]
    elif '–' in line:
        # Extracting job dates
        current_job['dates'] = line.strip()

    elif line.strip() and not line.strip().startswith('•'):
        # If a line is not starting with a bullet point, consider it as responsibility
        if 'responsibilities' in current_job:
            current_job['responsibilities'] += ' ' + line.strip()
        else:
            current_job['responsibilities'] = line.strip()

# Adding the last job to the list
if current_job:
    job_roles.append(current_job)

# Printing the extracted job roles, companies, responsibilities, and dates
for job in job_roles:
    print("Job Role:", job.get('role'))
    print("Company:", job.get('company'))
    print("Dates:", job.get('dates'))
    print("Responsibilities:", job.get('responsibilities'))
    print("-" * 50)

Job Role: ['Junior Ml Engineer', 'Emuq Tech (Miami, Florida US)']
Company: None
Dates: None
Responsibilities: : 02/2023 - 04/2024 ● ● ● ● ● ● ● Developed  Computer Vision projects using Python,Flutter,NextJS Developed ML and Custom Algorithm projects using  Python,Flutter,NextJS Developed Web Applications using Flutter,NextJS Developed Mobile Applications using Flutter Developed APIs using python FastAPI/Flask frameworks Data Analysis and Data Preprocessing and Data Migrations Developed Chat/Voice Bots using Python,Flutter ,rasa : 03/2023 - 09/2023 ● ● ● ● Developed Generative Ml models using Python Train Ml Models using Python Developed APIs using python FastAPI/Flask frameworks
--------------------------------------------------
Job Role: ['Intern Data Analyst', 'Dialog Axiata PLC']
Company: None
Dates: Technology (Specialization – Data Science)
Responsibilities: ● ● Selected for top 10 teams in Datarush 1.0 organized by DSSC of SLIIT. Selected for the Top 10 teams of Intellihack 2.0 

In [400]:
print(text)

  Prabhashwara 
Karunarathne 

prabhashwarabandara665@gmaill.com 

+94716201971 

52,Viduhalpathana, Bandarawela 

linkedin.com/in/prabhashwara-
karunarathne 
     N7365984 

Achievements 

•  Gold Star Award winner in 2008 
• 
Chess Champion of India and Sri 
Lanka in 2011 

Hobbies 

• 
• 
• 

Photography 
Travel 
Listening to music 

Skill Highlights 

• 

• 

• 
• 
• 

Customer-focused and solutions 
driven. 
Strong decision maker 

Complex analytical person 
Innovative 
Excellent interpersonal and 
communication skills 

Summary 
I am a resolute, Organized, and Methodical individual. I am seeking a challenge 
offering  growth  and  potential  in  a  progressive  organization.  I  am  a  motivated, 
highly enthusiastic, hardworking individual who consistently strives to enhance 
my learning and personal development whenever possible. 

Experience 

Executive- Recovery Services  
HNB Finance PLC 
(2023 October – Present) 

•  Manage  the  entire  process  of  letter  correspondence,

In [572]:
text_service_Letter = """
TO WHOM IT MAY CONCERN
This is to certify that Mr T N R Peiris S/No 16132 was employed at SriLankan Airlines Limited as Customer Service Agent (Ticketing) since 05.07.2004 to 04.07.2005 on contract basis. He was absorbed in to the permanent cadre since 05.07.2005 and left the Company Service with effect from 05.07.2009.
We thank Mr Peiris for the services rendered to the Company and wish him every success in his future endeavours.
R.Mewhas
RUKMANI MANOHAR
HUMAN RESOURCES SERVICES MANAGER
10.11.2010
mc
"""

In [563]:
text_service_Letter = """
SADISA MANAGEMENT & TECHNOLOGIES
SADISA M&T
B.R. No. 1998/M3 No. 5/61 138, Thalgaspedesa, Kirimatimulla, Thelijavila. 
077-8603555/071-8081809 
sadisamanagment@gmail.com
22nd AUGUST 2023

TO WHOM IT MAY CONCERN.

This is to certify that DON KODITHUWAKKU KARUNARATHNE RAVEEN SHENOL of 28/A/1, WALANA, WELIGAMA, bearer of NIC 200118200862 is employed at our organization as a 'Production Associator 'on sub-contract basis from 20th July 2021 to 30th March 2022.

During this period he has been assigned to the production department of Midigama Air Tire division at Michelin Lanka (pvt) Ltd.

During this period, we found him to be honest, loyal & hard working. He performed all the duties entrusted to him to the satisfaction of his superiors.

He bears a good moral character & I have no hesitation in recommending him to any prospective employer who needs his service in the same field.

We wish him all success in his future endeavors.

This letter was issued on his request and without prejudice.

Thanking you,

Yours truly,

Sadisa Management & Technologies

N.A.G.D Krishantha
Manager Administration
SADISA MANAGEMENT & TECHNOLOGIES
61/5, 13/B, Thalgaspedesa, Kirimetimulla, Thelejjavila.
"""

In [511]:
def change_month_to_number(text):
    month_dict = {
        'JANUARY': '01',
        'FEBRUARY': '02',
        'MARCH': '03',
        'APRIL': '04',
        'MAY': '05',
        'JUNE': '06',
        'JULY': '07',
        'AUGUST': '08',
        'SEPTEMBER': '09',
        'OCTOBER': '10',
        'NOVEMBER': '11',
        'DECEMBER': '12'
    }
    for month, value in month_dict.items():
        text = re.sub(r'\b{}\b'.format(month), value, text.upper())
    return text

In [573]:
tt = change_month_to_number(text_service_Letter)
print(tt)


TO WHOM IT 05 CONCERN
THIS IS TO CERTIFY THAT MR T N R PEIRIS S/NO 16132 WAS EMPLOYED AT SRILANKAN AIRLINES LIMITED AS CUSTOMER SERVICE AGENT (TICKETING) SINCE 05.07.2004 TO 04.07.2005 ON CONTRACT BASIS. HE WAS ABSORBED IN TO THE PERMANENT CADRE SINCE 05.07.2005 AND LEFT THE COMPANY SERVICE WITH EFFECT FROM 05.07.2009.
WE THANK MR PEIRIS FOR THE SERVICES RENDERED TO THE COMPANY AND WISH HIM EVERY SUCCESS IN HIS FUTURE ENDEAVOURS.
R.MEWHAS
RUKMANI MANOHAR
HUMAN RESOURCES SERVICES MANAGER
10.11.2010
MC



In [574]:
tt = re.sub(r'(\d{2})(?:ST|ND|RD|TH) (\d{2}) (\d{4})', r'\1 \2 \3', tt)
tt = re.sub(r'\.', ' ', tt)

In [575]:
print(tt)


TO WHOM IT 05 CONCERN
THIS IS TO CERTIFY THAT MR T N R PEIRIS S/NO 16132 WAS EMPLOYED AT SRILANKAN AIRLINES LIMITED AS CUSTOMER SERVICE AGENT (TICKETING) SINCE 05 07 2004 TO 04 07 2005 ON CONTRACT BASIS  HE WAS ABSORBED IN TO THE PERMANENT CADRE SINCE 05 07 2005 AND LEFT THE COMPANY SERVICE WITH EFFECT FROM 05 07 2009 
WE THANK MR PEIRIS FOR THE SERVICES RENDERED TO THE COMPANY AND WISH HIM EVERY SUCCESS IN HIS FUTURE ENDEAVOURS 
R MEWHAS
RUKMANI MANOHAR
HUMAN RESOURCES SERVICES MANAGER
10 11 2010
MC



In [576]:
lines = tt.split("\n")

for indx, line in enumerate(lines):
    # print(line)
    if " TO " in line:
        # dates = re.findall(r'\b\d{2}(?:ST|ND|RD|TH) \d{2} \d{4}\b', line)
        dates = re.findall(r'\b\d{2} \d{2} \d{4}\b', line)
        if dates:
            date_indexes = [line.index(date) for date in dates]
            to_index = line.index(" TO ")
            
            for date_index in date_indexes:
                print("Date range:", dates[date_indexes.index(date_index)], "to", dates[date_indexes.index(date_index)+1])
                break

Date range: 05 07 2004 to 04 07 2005


In [538]:
for indx,line in enumerate(fk):
    # print(line)
    findTo = re.findall(r'TO', line)
    if findTo:
        for tex in line.split():
            print(tex)

    # dates = re.findall(r'\b\d{2}(?:ST|ND|RD|TH) \d{2} \d{4}\b', line)
    print(findTo)

[]
[]
[]
[]
077-8603555/071-8081809
SADISAMANAGMENT@GMAIL.COM
22ND
08
2023
TO
WHOM
IT
05
CONCERN.
['TO']
THIS
IS
TO
CERTIFY
THAT
DON
KODITHUWAKKU
KARUNARATHNE
RAVEEN
SHENOL
OF
28/A/1,
WALANA,
WELIGAMA,
BEARER
OF
NIC
200118200862
IS
EMPLOYED
AT
OUR
ORGANIZATION
AS
A
'PRODUCTION
ASSOCIATOR
'ON
SUB-CONTRACT
BASIS
FROM
20TH
07
2021
TO
30TH
03
2022.
['TO', 'TO', 'TO']
DURING
THIS
PERIOD
HE
HAS
BEEN
ASSIGNED
TO
THE
PRODUCTION
DEPARTMENT
OF
MIDIGAMA
AIR
TIRE
DIVISION
AT
MICHELIN
LANKA
(PVT)
LTD.
DURING
THIS
PERIOD,
WE
FOUND
HIM
TO
BE
HONEST,
LOYAL
&
HARD
WORKING.
['TO', 'TO']
HE
PERFORMED
ALL
THE
DUTIES
ENTRUSTED
TO
HIM
TO
THE
SATISFACTION
OF
HIS
SUPERIORS.
['TO', 'TO']
HE
BEARS
A
GOOD
MORAL
CHARACTER
&
I
HAVE
NO
HESITATION
IN
RECOMMENDING
HIM
TO
ANY
PROSPECTIVE
EMPLOYER
WHO
NEEDS
HIS
SERVICE
IN
THE
SAME
FIELD.
['TO']
[]
[]
[]


In [529]:
fk

['\nSADISA MANAGEMENT & TECHNOLOGIES\nSADISA M&T\nB.R.',
 'NO.',
 '1998/M3 NO.',
 '5/61 138, THALGASPEDESA, KIRIMATIMULLA, THELIJAVILA.',
 '077-8603555/071-8081809 \nSADISAMANAGMENT@GMAIL.COM\n22ND 08 2023\n\nTO WHOM IT 05 CONCERN.',
 "THIS IS TO CERTIFY THAT DON KODITHUWAKKU KARUNARATHNE RAVEEN SHENOL OF 28/A/1, WALANA, WELIGAMA, BEARER OF NIC 200118200862 IS EMPLOYED AT OUR ORGANIZATION AS A 'PRODUCTION ASSOCIATOR 'ON SUB-CONTRACT BASIS FROM 20TH 07 2021 TO 30TH 03 2022.",
 'DURING THIS PERIOD HE HAS BEEN ASSIGNED TO THE PRODUCTION DEPARTMENT OF MIDIGAMA AIR TIRE DIVISION AT MICHELIN LANKA (PVT) LTD.\n\nDURING THIS PERIOD, WE FOUND HIM TO BE HONEST, LOYAL & HARD WORKING.',
 'HE PERFORMED ALL THE DUTIES ENTRUSTED TO HIM TO THE SATISFACTION OF HIS SUPERIORS.',
 'HE BEARS A GOOD MORAL CHARACTER & I HAVE NO HESITATION IN RECOMMENDING HIM TO ANY PROSPECTIVE EMPLOYER WHO NEEDS HIS SERVICE IN THE SAME FIELD.',
 'WE WISH HIM ALL SUCCESS IN HIS FUTURE ENDEAVORS.',
 'THIS LETTER WAS ISSUED ON 

In [527]:
dates = re.findall(r'\b\d{2}(?:ST|ND|RD|TH) \d{2} \d{4}\b', tt)
to_index = text.index(" TO ")
selected_date = None

for date in dates:
    index = text.index(date)
    if index < to_index:
        selected_date = (date, index)
        break

print(selected_date)

ValueError: substring not found

In [None]:
# Regular expression pattern to match dates in the format "ddth Month yyyy"
date_pattern = r'\b(\d{1,2}(?:st|nd|rd|th)\s(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4})\b'

# Find all dates in the text
dates = re.findall(date_pattern, text)

# Extract start and end dates
start_date = dates[0]
end_date = dates[1]

print("Start Date:", start_date)
print("End Date:", end_date)

/ gggg

In [344]:
test2 = re.sub(r'G.C.E',r'G.C.E.',new_text)
test2

' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development . Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations. I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine. Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Application Development

In [343]:
test2 = re.sub(r'Diploma',r'Diploma.',new_text)
test2

' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development . Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations. I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine. Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Application Development

In [259]:
test2 = re.sub(r'  ',r' ',new_text)

In [260]:
education,edu = extract_education(test2)
print(edu)

nlp_text ['Shehan Krishan Data Science Graduate Contact -  0767158801 shehankrishan6@gmail.com 34/25 Kelaninadee rd,Mulleriyawa  https - //www.linkedin.com/', 'in/shehan-', 'krishan/ TECHNICAL SKILLS -  Programming Languages', 'Python', 'Dart Javascript Professional Profile -  A motivated and results-oriented Ml Engineer with one  year of experience in AI', '/ML ,Web Development, API  Development,Mobile App Development .', 'Possesses a  strong a strong Problem Solving,Time Management  and Analytical Thinking.', 'Adept at AI, Machine Learning,  Flutter, Python, and API development.', 'I am a mature  team worker and adaptable to all challenging  situations.', 'I can work well both in a team environment  as well as using my own initiative.', 'Seeking to leverage  my skills and experience to contribute effectively to  your company where my skills can shine.', 'Moreover,I  am looking for an opportunity that offers me to  develop new skills while strengthening those I already  Mobile Applica

In [267]:
test2

' Shehan Krishan Data Science Graduate Contact -  0767158801 shehankrishan6@gmail.com 34/25 Kelaninadee rd,Mulleriyawa  https - //www.linkedin.com/in/shehan- krishan/ TECHNICAL SKILLS -  Programming Languages   Python Dart Javascript Professional Profile -  A motivated and results-oriented Ml Engineer with one  year of experience in AI /ML ,Web Development, API  Development,Mobile App Development . Possesses a  strong a strong Problem Solving,Time Management  and Analytical Thinking.Adept at AI, Machine Learning,  Flutter, Python, and API development.I am a mature  team worker and adaptable to all challenging  situations. I can work well both in a team environment  as well as using my own initiative.Seeking to leverage  my skills and experience to contribute effectively to  your company where my skills can shine. Moreover,I  am looking for an opportunity that offers me to  develop new skills while strengthening those I already  Mobile Application Development possess.   Flutter Java Web

In [278]:
xx = nltk.sent_tokenize(test2)

In [279]:
xx

[' Shehan Krishan Data Science Graduate  Contact -   0767158801  shehankrishan6@gmail.com  34/25 Kelaninadee rd,Mulleriyawa   https - //www.linkedin.com/in/shehan- krishan/  TECHNICAL SKILLS -   Programming Languages      Python Dart Javascript  Professional Profile -   A motivated and results-oriented Ml Engineer with one   year of experience in AI /ML ,Web Development, API   Development,Mobile App Development .',
 'Possesses a   strong a strong Problem Solving,Time Management   and Analytical Thinking.Adept at AI, Machine Learning,   Flutter, Python, and API development.I am a mature   team  worker and adaptable to all challenging   situations.',
 'I can work well  both in a team environment   as well as using my own initiative.Seeking to leverage   my skills and experience to contribute effectively to   your company where my skills can shine.',
 'Moreover,I   am looking for an opportunity that offers me to   develop  new skills while strengthening those I already   Mobile Applicatio

In [269]:
xx

[' Shehan Krishan Data Science Graduate Contact -  0767158801 shehankrishan6@gmail.com 34/25 Kelaninadee rd,Mulleriyawa  https - //www.linkedin.com/in/shehan- krishan/ TECHNICAL SKILLS -  Programming Languages   Python Dart Javascript Professional Profile -  A motivated and results-oriented Ml Engineer with one  year of experience in AI /ML ,Web Development, API  Development,Mobile App Development .',
 'Possesses a  strong a strong Problem Solving,Time Management  and Analytical Thinking.Adept at AI, Machine Learning,  Flutter, Python, and API development.I am a mature  team worker and adaptable to all challenging  situations.',
 'I can work well both in a team environment  as well as using my own initiative.Seeking to leverage  my skills and experience to contribute effectively to  your company where my skills can shine.',
 'Moreover,I  am looking for an opportunity that offers me to  develop new skills while strengthening those I already  Mobile Application Development possess.',
 'F

In [263]:
import nltk
 
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
 
 
RESERVED_WORDS = [
    'school',
    'college',
    'univers',
    'academy',
    'faculty',
    'institute',
    'faculdades',
    'Schola',
    'schule',
    'lise',
    'lyceum',
    'lycee',
    'polytechnic',
    'kolej',
    'ünivers',
    'okul',
]
 
 
 
def extract_education(input_text):
    organizations = []
 
    # first get all the organization names using nltk
    for sent in nltk.sent_tokenize(input_text):
        for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
            if hasattr(chunk, 'label') and chunk.label() == 'ORGANIZATION':
                organizations.append(' '.join(c[0] for c in chunk.leaves()))
 
    # we search for each bigram and trigram for reserved words
    # (college, university etc...)
    education = set()
    for org in organizations:
        for word in RESERVED_WORDS:
            if org.lower().find(word) >= 0:
                education.add(org)
 
    return education

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sheha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sheha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\sheha\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\sheha\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\sheha\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


In [266]:
extract_education(new_text)

{'Research Institute',
 'Sri Lanka Institute',
 'Sri Subuthi National School',
 'University',
 'University Project Experience'}