In [None]:
!pip install corextopic

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting corextopic
  Downloading corextopic-1.1-py3-none-any.whl (27 kB)
Installing collected packages: corextopic
Successfully installed corextopic-1.1


In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from nltk.stem import WordNetLemmatizer
import pandas as pd
from collections import Counter
import re
import pickle
import string
from sklearn.decomposition import NMF
import spacy
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
from corextopic import corextopic as ct

In [None]:
def lemmatize_verbs(words):
    """Lemmatize verbs in list of tokenized words"""
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for word in words:
        lemma = lemmatizer.lemmatize(word, pos='v')
        lemmas.append(lemma)
    return ' '.join(lemmas)

In [None]:
def only_word(list_):
    """
    This method is to return list of words only in the passed list.
    """
    list_to_return = []
    for ele in list_:
        if len(ele) >= 3:
            list_to_return.append(ele)
    return ' '.join(list_to_return)

In [None]:
def clean_method(text):
    """This method depends on lemmatize_verbs, remove_stopwords and clean_text to apply them on passed parameter"""
    if len(text) <= 100:
        return 'd'
    stopwords = nltk.corpus.stopwords.words('english')
#     text = text.lower() 
    text = text.replace('\\n', ' ').replace('\\r', ' ').replace('\n', ' ').replace('\r', ' ').replace('\xef', ' ').replace('\\xef', ' ')
    text = re.sub('http\S+\s*', ' ', text)
    text = re.sub(r'(.)\1+', r'\1', text)
    text = re.sub('[^a-zA-Z]+', ' ', text)
    text = only_word(text.split())
    text = lemmatize_verbs(text.split())
    text = [word for word in text.split() if word not in stopwords]
    return ' '.join(text)

In [None]:
def display_topics(model, feature_names, no_top_words, topic_names=None):
    """...."""
    for ix, topic in enumerate(model.components_):
        if not topic_names or not topic_names[ix]:
            print("\nTopic ", ix)
        else:
            print("\nTopic: '",topic_names[ix],"'")
        print(", ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

In [None]:
def get_freq_word(TEXT_list):
    """This method get count the frequency of each word in the passed parameter"""
    Vocab_list = ' '.join(TEXT_list)
    temp_dict = Counter(Vocab_list.split()).items()
    freq_df = pd.DataFrame(temp_dict)
    freq_df = freq_df.sort_values(1, ascending = False)
    return freq_df

In [None]:
def write_pickle(file_name, df_to_write):
    """This method is to save the passed dataframe as a pickle file with the passed name"""
    with open(file_name, 'wb') as handle:
        pickle.dump(df_to_write, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
def read_pickle(file_name):
    """...."""
    with open(file_name , 'rb') as handle:
        return pickle.load(handle)

In [None]:
def remove_domain_stop_words(stop_words, df_edit):
    """Remove domain specific stop words from the passed parameter"""
    df_after = df_edit.copy()
    for i in range(len(df_after)):
        for word in stop_words:
            pattern = r'\b' + word + r'\b'
            new_text = re.sub(pattern,'', df_after.TEXT[i])
            df_after.TEXT[i] = new_text
    return df_after

# **Remove Data from Pickle**

In [None]:
data = read_pickle('/content/df_1_after_cleaning.pickle')

# **Explore Data**

In [None]:
## this step is to display all text in each row
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

In [None]:
data.sample(2, random_state = 1)

Unnamed: 0,Text
307,curriculum vitae sanjaysinh jadeja behind post office village motakapada taluka disa dist banaskatha gujarat mob email sanjaygmailcom passport validity career objective obtain store officer position with carter where may able utilize skills managing employees and store operations while working highly stimulating work environment personal information name sanjaysinh jadeja father name sukhdevsinh jadeja date birth may sex male marriage married language proficiency english hindi and gujarati academic qualification academic qualification university board year passing percentage ssc sir ajitsinhji govt highschool hsc sir ajitsinhji govt highschool bba sikkim manipal university mba sikkim manipal university computer skills operating systems windows office store sap system applications products erpenterprise resource planning pms system tally work experience years experiences stores total experiences are store dept various construction companies which taking contract construction industrial building factory power projects and infrastructures projects presently working kunal struture india pvt ltd project tawa canal project harda position store officer duration feb’ till date
2098,fire alarm maintenance emergency lighting and fire fighting system experience the installation maintenance and repair fire alarm voice evacuation emergency lighting and firefighting systems maintain and follow safety rules per construction standard personal profiles name abdul nasar siddiqui father name name abdul sayeed siddiqui date birth religion islam nationality indian marital status single languages known urdu hindi and english passport details passport date issue date expiry place issue lucknow visa status employment visa conclusion declaration hereby declare that the above mentioned statement correct true the best knowledge belief abdul nasar siddiqui


In [None]:
data.head(2)

Unnamed: 0,Text
0,personal information curriculum vitae fullnames mike kisasatiwanaswa idcardno postaladdress box mombasa telephoneno emailaddress mikewanaswagmailco mlanguages well spoken english and swahilipurpose put use the latest inventions telecommunication and information technology for positive impact individuals business enterprises and corporate organizations work experience date april todate position fixed data networktechnician employer ben’s electronics services ltdmombasa duties survey installation integration maintenance support and decommissioning fixed data services using various access technologies wimax fiber microwaves and wifi for safaricomltd survey installation and support ceragon’s access technology for airtel survey installation and support cambridge pmp solutions for safaricom fiber optics splicing and terminations deployment support and maintenance wanlan design installation and technical support structured cabling installation and support for cctv cameras biometrics security controls installation and support for radwin links and ceragon ptmplinks installation and support for telrad wimax bts wifi setup maintenance and support and sipvoip installation and support systems integration cisco and huawei platforms for mpls ppvpn and internet fixed lte installation for safaricom’s enterprise clients date september april position freelance computertechnician employer self employed inmombasa duties installation software and hardware for pcs serversprinters web design and website maintenance repair pcs printers scanners and other computer accessories lanwan design installation and support date march august position scanning officersystemadministrator employer interim independent electoralcommission duties scanning omr forms extract data for thevoters creation and maintenance voters’ databaseregister end user training and support voter management system mmc administration mail servers administration
1,network management data recovery repair and maintenance scanner printers and computers date january march position freelance computert duties installation software and hardware for pcs serversprinters repair pcs printers scanners and other computer accessories lanwan support educational background date september december institution inoorero university kenya school professional studies nairobicourse diploma computer engineering system administration and supportgrade credit date feb november institution bungoma high schoolbungoma grade kcse mean grade date jan december institution bungoma deb primary schoolbungoma grade kcpe out marks professional trainings date july institution amirancommunications course radwin installer pppmp grade distinction date march institution cambridge broadband networks kenyaltd course vectarstarinstallation grade distinction date jan institution ceragonnetworks course ceragon certified rollout professionalipcsen grade distinction date june institution learning ruckus course ruckus wireless certified associate support engineerrwcasupe grade distinction date jan mar institution iatmombasa course ccna grade distinction


In [None]:
data.tail(2)

Unnamed: 0,Text
9772,sslc moodbidri secondary education examination board puc jain junior college moodbidri department pre university education diploma automobil engg snm polytechnic moodbidri board technical education bangalore programming languages basic programming html programming platform windows windows windows software catia solid edge autocad achieved zero ppmzero defects counter measure taken using why why analysis’ successfully installed crack detection lights for detection cracks and pinholes the parts catia and autocad certificate cad desk attended safety training conducted joai father’s name ramesh mother’s name nalini date birth gender male nationality indian marital status single languages know english hindi kannada tulu telugu hereby declare that particulars given above are true the best knowledge and take full responsibility for any discrepancy false information contribution and achievements mechanical and related skills trainings and certificates personal details
9773,place moodbidri signature


# **Add SPACY_TEXT column**

In [None]:
# rename the 'Text' column to 'TEXT'
data = data.rename(columns={'Text': 'TEXT'})
data

Unnamed: 0,TEXT
0,personal information curriculum vitae fullnames mike kisasatiwanaswa idcardno postaladdress box mombasa telephoneno emailaddress mikewanaswagmailco mlanguages well spoken english and swahilipurpose put use the latest inventions telecommunication and information technology for positive impact individuals business enterprises and corporate organizations work experience date april todate position fixed data networktechnician employer ben’s electronics services ltdmombasa duties survey installation integration maintenance support and decommissioning fixed data services using various access technologies wimax fiber microwaves and wifi for safaricomltd survey installation and support ceragon’s access technology for airtel survey installation and support cambridge pmp solutions for safaricom fiber optics splicing and terminations deployment support and maintenance wanlan design installation and technical support structured cabling installation and support for cctv cameras biometrics security controls installation and support for radwin links and ceragon ptmplinks installation and support for telrad wimax bts wifi setup maintenance and support and sipvoip installation and support systems integration cisco and huawei platforms for mpls ppvpn and internet fixed lte installation for safaricom’s enterprise clients date september april position freelance computertechnician employer self employed inmombasa duties installation software and hardware for pcs serversprinters web design and website maintenance repair pcs printers scanners and other computer accessories lanwan design installation and support date march august position scanning officersystemadministrator employer interim independent electoralcommission duties scanning omr forms extract data for thevoters creation and maintenance voters’ databaseregister end user training and support voter management system mmc administration mail servers administration
1,network management data recovery repair and maintenance scanner printers and computers date january march position freelance computert duties installation software and hardware for pcs serversprinters repair pcs printers scanners and other computer accessories lanwan support educational background date september december institution inoorero university kenya school professional studies nairobicourse diploma computer engineering system administration and supportgrade credit date feb november institution bungoma high schoolbungoma grade kcse mean grade date jan december institution bungoma deb primary schoolbungoma grade kcpe out marks professional trainings date july institution amirancommunications course radwin installer pppmp grade distinction date march institution cambridge broadband networks kenyaltd course vectarstarinstallation grade distinction date jan institution ceragonnetworks course ceragon certified rollout professionalipcsen grade distinction date june institution learning ruckus course ruckus wireless certified associate support engineerrwcasupe grade distinction date jan mar institution iatmombasa course ccna grade distinction
2,date july institution viscar industrial capacityltd course fall arrest technicianwork atheight grade distinction note possession clean class bce driving licensereferees patrick odame regional election coordinator bungoma box bungoma phone snyodameyahoocou cyrus soi projects manager bens electronics box mombasa phone infobensele ctronicscoke christine owuor fixed access engineer safaricom box nairobi phone cowuorsafaricomcoke kelvin ongoro field engineersafaricom box nairobi phone kongorosafaricom coke
3,curriculum vitae ayush pandey current address plot shastri nagar kanpur uttar pradesh india permanent address plot shastri nagar kanpur uttar pradesh india passport place issue lucknow expiry date shubpandeyyahooin objective challenging and rewarding suitable position growthoriented organization this offers diverse job responsibility mechanical engineering new product introduction and will fully utilize technical and managerial skills education qualification bachelor mechanical engineering seth sriniwas agarwal institute engineering technology uptu kanpur awarded first class completed xiith standard the year from sardar patel inter college kanpur board awarded first class completed xth standard the year from ram krishana mission higher secondary school kanpur board awarded first class employment history frontier alloy steels ltd kanpur upindia design development engineer deptt june present understand and study the complete component drawings developing modelling and drawings with gdt component and pattern finite element analysis for stress strain and deformation analysis with the help ansys software casting simulation solve methoding and find out defects like shrinkage through procast software before production reverse engineering process machining inspection inspection the products defect analysis and then implement the ways minimize present defect within short time process planning execution lead the shop floor per requirement when products are under development deptt maintain all the records per international standards develop different types components ayush pandey shubpandeyyahooin page
4,special purpose machine remove burs from sharp corner edges after machining software skill having strong command below given software for computer aided designing catia for modeling partassembly and drafting both procast for casting simulation detect shrinkage air entrapment molten metal flow ugnx for modeling partassembly drafting further analysis solidworks for modeling partassemblydrafting ansys knowledge for finite element analysis structural analysis solidcam basic knowledge for machining autocad avicad for drafting reverse engineering diploma computer application technical skill excellent skills designing drawings models excellent skills casting manufacturing process pipe designing reverse engineering computer aided designing good presenter plant layout design development process planning facility planning documentations project management risk management lesson learning and implementation machine specification and finalization summer internship summer internship power grid corporation india limited one month summer internship pwd one month summer internship frontier spring pvt ltd topic manufacturing hot coiled helical springs one month major projects current industrial project “couplerknuckle” which are also using wagons industrial project sep team members role this project did all the designing developing part components such drafting modeling casting simulation for defect free casting quality proof casting documentation industrial project “draft gear sl” which are using wagons industrial project nov role this project lead the designing portion for this product team members automated flexialbe transfer line college project march role this project completed all the designing part such ddrawings dmodeling team members running project coach coupler with balanced draft gear ayush pandey shubpandeyyahooin page
...,...
9769,completed four weeks summer training from“ indian railway signal and networking gorakhpur” experience computer networking and accounting work year experience technosys company ltd project mini project title “library management system” team size responsibility study about diagram coding language description computer shop software multilanguage software for store billing management sell hardware systems components and accessories computer shop management system project will used for various purposes under computer shop major project title “dual link failure resiliency through backup link mutual exclusion” team size description the fast advancing global information infrastructure including information technology and computer networks such the internet and telecommunications systemsenable the development electronic commerce global level the nearly universal connectivity which the internet offers has made invaluable business tool these developments have created new type economy which many call the ‘digital economy’ this fast emerging economy bringing with rapidly changing technologies increasing knowledge intensity all areas business and creating virtual supply chains and new forms businesses and service delivery channel such ebanking achievements got third position high jump college athletics event personal profile father’s name ram naval yadav date birth languages known english hindi address villagegosaipur postjungle kauriya district gorakhpur declaration hereby declare that all the information mentioned above true and correct the best knowledge
9770,date amit kumar yadav place itm email idamityadavitmgmailcom gida gorakhpur mobile
9771,resume nishanth kumar address ramesh suvarna manjunath road pranthya village mangalore taluk district karnataka email nishanthkumar gmailcom phone looking for opportunity reputed organization which will help deliver best and upgrade skills engineering and meet the demands the organization company name jbm ogihara automotive india pvt ltd designation diploma engineer trainee period june july roles and responsibilities oversee mechanical design phases from concept through production collaborate with senior mechanical designers large scale design project selected assist the tool design engineer the development jigs and fixtures using autocad solid edge and catia conducting the inspection and measuring the parts using various scale gauges and tools auditing the poka yoke error proof kaizen performed production team and approving the same countermeasure report for customer problems field complaints ascertaining proper adequate safety shop floor among team members employing kaizen quality department making simple but effective gauges for checking parts coordinating with jig maintenance and production control department achieve the periodic production targets course institution board year passing percent age jain high school karnataka objectives experience accademicqualification
9772,sslc moodbidri secondary education examination board puc jain junior college moodbidri department pre university education diploma automobil engg snm polytechnic moodbidri board technical education bangalore programming languages basic programming html programming platform windows windows windows software catia solid edge autocad achieved zero ppmzero defects counter measure taken using why why analysis’ successfully installed crack detection lights for detection cracks and pinholes the parts catia and autocad certificate cad desk attended safety training conducted joai father’s name ramesh mother’s name nalini date birth gender male nationality indian marital status single languages know english hindi kannada tulu telugu hereby declare that particulars given above are true the best knowledge and take full responsibility for any discrepancy false information contribution and achievements mechanical and related skills trainings and certificates personal details


In [None]:
nlp = spacy.load('en_core_web_sm')
data['SPACY_TEXT'] = list(nlp.pipe(data.TEXT))

In [None]:
data.head(2)

Unnamed: 0,TEXT,SPACY_TEXT
0,personal information curriculum vitae fullnames mike kisasatiwanaswa idcardno postaladdress box mombasa telephoneno emailaddress mikewanaswagmailco mlanguages well spoken english and swahilipurpose put use the latest inventions telecommunication and information technology for positive impact individuals business enterprises and corporate organizations work experience date april todate position fixed data networktechnician employer ben’s electronics services ltdmombasa duties survey installation integration maintenance support and decommissioning fixed data services using various access technologies wimax fiber microwaves and wifi for safaricomltd survey installation and support ceragon’s access technology for airtel survey installation and support cambridge pmp solutions for safaricom fiber optics splicing and terminations deployment support and maintenance wanlan design installation and technical support structured cabling installation and support for cctv cameras biometrics security controls installation and support for radwin links and ceragon ptmplinks installation and support for telrad wimax bts wifi setup maintenance and support and sipvoip installation and support systems integration cisco and huawei platforms for mpls ppvpn and internet fixed lte installation for safaricom’s enterprise clients date september april position freelance computertechnician employer self employed inmombasa duties installation software and hardware for pcs serversprinters web design and website maintenance repair pcs printers scanners and other computer accessories lanwan design installation and support date march august position scanning officersystemadministrator employer interim independent electoralcommission duties scanning omr forms extract data for thevoters creation and maintenance voters’ databaseregister end user training and support voter management system mmc administration mail servers administration,"(personal, information, curriculum, vitae, fullnames, mike, kisasatiwanaswa, idcardno, postaladdress, box, mombasa, telephoneno, emailaddress, mikewanaswagmailco, mlanguages, well, spoken, english, and, swahilipurpose, put, use, the, latest, inventions, telecommunication, and, information, technology, for, positive, impact, individuals, business, enterprises, and, corporate, organizations, work, experience, date, april, todate, position, fixed, data, networktechnician, employer, ben, ’s, electronics, services, ltdmombasa, duties, survey, installation, integration, maintenance, support, and, decommissioning, fixed, data, services, using, various, access, technologies, wimax, fiber, microwaves, and, wifi, for, safaricomltd, survey, installation, and, support, ceragon, ’s, access, technology, for, airtel, survey, installation, and, support, cambridge, pmp, solutions, for, safaricom, fiber, optics, splicing, and, terminations, deployment, ...)"
1,network management data recovery repair and maintenance scanner printers and computers date january march position freelance computert duties installation software and hardware for pcs serversprinters repair pcs printers scanners and other computer accessories lanwan support educational background date september december institution inoorero university kenya school professional studies nairobicourse diploma computer engineering system administration and supportgrade credit date feb november institution bungoma high schoolbungoma grade kcse mean grade date jan december institution bungoma deb primary schoolbungoma grade kcpe out marks professional trainings date july institution amirancommunications course radwin installer pppmp grade distinction date march institution cambridge broadband networks kenyaltd course vectarstarinstallation grade distinction date jan institution ceragonnetworks course ceragon certified rollout professionalipcsen grade distinction date june institution learning ruckus course ruckus wireless certified associate support engineerrwcasupe grade distinction date jan mar institution iatmombasa course ccna grade distinction,"(network, management, data, recovery, repair, and, maintenance, scanner, printers, and, computers, date, january, march, position, freelance, computert, duties, installation, software, and, hardware, for, pcs, serversprinters, repair, pcs, printers, scanners, and, other, computer, accessories, lanwan, support, educational, background, date, september, december, institution, inoorero, university, kenya, school, professional, studies, nairobicourse, diploma, computer, engineering, system, administration, and, supportgrade, credit, date, feb, november, institution, bungoma, high, schoolbungoma, grade, kcse, mean, grade, date, jan, december, institution, bungoma, deb, primary, schoolbungoma, grade, kcpe, out, marks, professional, trainings, date, july, institution, amirancommunications, course, radwin, installer, pppmp, grade, distinction, date, march, institution, cambridge, broadband, networks, kenyaltd, course, vectarstarinstallation, ...)"


In [None]:
data.sample(2)

Unnamed: 0,TEXT,SPACY_TEXT
3931,biswajit parija contact email parijabiswajityahoocom skype biswajitparija accountant looking forward challenging assignments finance sector where strong background accounting operations management can put the best use summary dynamic and resourceful professional with more than years experience accounting operations managements possesses indepth knowledge about accounting financial processes and has the capability efficiently deal with matters related sales tax esi pro tax and mis key skills accounting financing knowledge accounts receivables accounts payables management financial report statements preparation sales tax esi pro tax and mis handling good organizational management skills team motivation multitasking hard work honesty individual well team play professional experience govind rubber limited cuttack jul till date designation branch accountant job responsibilities handling billing and payment managing cash credit notes and debit notes ensuring stock entry dealing with bank reconciliation branch reconciliation handling sales tax esi pro tax and mis rajprotim agency pvt ltd castrol india ltd jan jul designation incharge accountant industrial stock point isp castrol educational qualifications post graduate diploma computer tally ica,"(biswajit, parija, contact, email, parijabiswajityahoocom, skype, biswajitparija, accountant, looking, forward, challenging, assignments, finance, sector, where, strong, background, accounting, operations, management, can, put, the, best, use, summary, dynamic, and, resourceful, professional, with, more, than, years, experience, accounting, operations, managements, possesses, indepth, knowledge, about, accounting, financial, processes, and, has, the, capability, efficiently, deal, with, matters, related, sales, tax, esi, pro, tax, and, mis, key, skills, accounting, financing, knowledge, accounts, receivables, accounts, payables, management, financial, report, statements, preparation, sales, tax, esi, pro, tax, and, mis, handling, good, organizational, management, skills, team, motivation, multitasking, hard, work, honesty, individual, well, team, play, professional, experience, govind, ...)"
5294,references mrchrysantus gicheru senior hydro geologist bsc msc directorearth water limited gichmuchyahoocom maxwell barasa hydro geologistrural director everesta water limited focus maxeverestacoke moses ongoro county water director county government bungoma,"(references, mrchrysantus, gicheru, senior, hydro, geologist, bsc, msc, directorearth, water, limited, gichmuchyahoocom, maxwell, barasa, hydro, geologistrural, director, everesta, water, limited, focus, maxeverestacoke, moses, ongoro, county, water, director, county, government, bungoma)"


In [None]:
# Delete any Entities 
for i in range(0, len(data)):
    ents = data.SPACY_TEXT[i]
    j = 0
    for ent in ents.ents:
        pattern = r'\b' + ent.text + r'\b'
        new_text = re.sub(pattern,' ', data.TEXT[j])
        data.TEXT[j] = new_text
        j += 1

In [None]:
data.head(2)

Unnamed: 0,TEXT,SPACY_TEXT
0,personal information fullnames kisasatiwanaswa idcardno postaladdress box telephoneno emailaddress mikewanaswagmailco mlanguages well spoken and swahilipurpose put use the latest inventions telecommunication and information technology for positive impact individuals business enterprises and corporate organizations work experience date todate position fixed data networktechnician employer ben’s electronics services ltdmombasa duties survey installation integration maintenance support and decommissioning fixed data services using various access technologies wimax fiber microwaves and wifi for safaricomltd survey installation and support ceragon’s access technology for airtel survey installation and support cambridge pmp solutions for safaricom fiber optics splicing and terminations deployment support and maintenance wanlan design installation and technical support structured cabling installation and support for cameras biometrics security controls installation and support for radwin links and ceragon ptmplinks installation and support for telrad wimax bts wifi setup maintenance and support and sipvoip installation and support systems integration and huawei platforms for mpls ppvpn and internet fixed lte installation for safaricom’s enterprise clients date position freelance computertechnician employer self employed inmombasa duties installation software and hardware for pcs serversprinters web design and website maintenance repair pcs printers scanners and other computer accessories design installation and support date position scanning officersystemadministrator employer interim independent electoralcommission duties scanning omr forms extract data for thevoters creation and maintenance voters’ databaseregister end user training and support voter management system mmc administration mail servers administration,"(personal, information, curriculum, vitae, fullnames, mike, kisasatiwanaswa, idcardno, postaladdress, box, mombasa, telephoneno, emailaddress, mikewanaswagmailco, mlanguages, well, spoken, english, and, swahilipurpose, put, use, the, latest, inventions, telecommunication, and, information, technology, for, positive, impact, individuals, business, enterprises, and, corporate, organizations, work, experience, date, april, todate, position, fixed, data, networktechnician, employer, ben, ’s, electronics, services, ltdmombasa, duties, survey, installation, integration, maintenance, support, and, decommissioning, fixed, data, services, using, various, access, technologies, wimax, fiber, microwaves, and, wifi, for, safaricomltd, survey, installation, and, support, ceragon, ’s, access, technology, for, airtel, survey, installation, and, support, cambridge, pmp, solutions, for, safaricom, fiber, optics, splicing, and, terminations, deployment, ...)"
1,network management data recovery repair and maintenance scanner printers and computers date position freelance computert duties installation software and hardware for pcs serversprinters repair pcs printers scanners and other computer accessories lanwan support educational background date institution inoorero university school professional studies nairobicourse diploma computer engineering system administration and supportgrade credit date institution bungoma high schoolbungoma grade kcse mean grade date institution bungoma deb primary schoolbungoma grade kcpe out marks professional trainings date institution amirancommunications course radwin installer pppmp grade distinction date institution broadband networks kenyaltd course vectarstarinstallation grade distinction date institution ceragonnetworks course ceragon certified rollout professionalipcsen grade distinction date institution learning ruckus course ruckus wireless certified associate support engineerrwcasupe grade distinction date institution iatmombasa course ccna grade distinction,"(network, management, data, recovery, repair, and, maintenance, scanner, printers, and, computers, date, january, march, position, freelance, computert, duties, installation, software, and, hardware, for, pcs, serversprinters, repair, pcs, printers, scanners, and, other, computer, accessories, lanwan, support, educational, background, date, september, december, institution, inoorero, university, kenya, school, professional, studies, nairobicourse, diploma, computer, engineering, system, administration, and, supportgrade, credit, date, feb, november, institution, bungoma, high, schoolbungoma, grade, kcse, mean, grade, date, jan, december, institution, bungoma, deb, primary, schoolbungoma, grade, kcpe, out, marks, professional, trainings, date, july, institution, amirancommunications, course, radwin, installer, pppmp, grade, distinction, date, march, institution, cambridge, broadband, networks, kenyaltd, course, vectarstarinstallation, ...)"


In [None]:
vectorizer = CountVectorizer(max_features=20000,
                             stop_words='english', token_pattern="\\b[a-z][a-z]+\\b",
                             binary=True)

doc_word = vectorizer.fit_transform(data.TEXT)
words = list(np.asarray(vectorizer.get_feature_names_out()))

In [None]:
topic_model = ct.Corex(n_hidden=6, words=words, seed=1)
topic_model.fit(doc_word, words=words, docs=data.TEXT)

<corextopic.corextopic.Corex at 0x7f88140b5c60>

In [33]:
# Print all topics from the CorEx topic model
topics = topic_model.get_topics()
for n,topic in enumerate(topics):
    topic_words,_,_ = zip(*topic)
    print('{}: '.format(n) + ','.join(topic_words))

0: reports,ensure,responsibilities,management,preparing,daily,client,customer,requirements,planning
1: los,angeles,cienega,cloutierremixcom,california,avenue,aguilar,valdez,makeup,microondas
2: electrical,maintenance,installation,engineer,power,commissioning,plant,equipment,testing,erection
3: birth,date,marital,nationality,personal,english,status,hindi,father,known
4: objective,skills,career,email,qualification,university,experience,organization,professional,year
5: research,medical,surgery,orthopaedic,conference,international,hospital,association,society,trauma


In [35]:
tfidf = TfidfVectorizer(stop_words='english')
doc_words = tfidf.fit_transform(data.TEXT)
tfidf_df = pd.DataFrame(doc_words.toarray(),columns=tfidf.get_feature_names_out())

In [36]:
topic_nums = list(np.arange(3, 11, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words)
    display_topics(nmf, tfidf.get_feature_names_out(), 100)
    print("------------------------------------------")


Topic  0
project, electrical, maintenance, site, installation, engineer, work, power, construction, equipment, safety, drawings, works, quality, projects, mep, preparation, plant, daily, control, panels, commissioning, material, testing, design, operation, progress, engineering, client, responsibilities, erection, cable, planning, technical, inspection, systems, dubai, schedule, company, monitoring, building, report, execution, standards, job, ensure, management, panel, india, monthly, experience, contractor, activities, test, materials, tower, reports, review, preparing, designation, hvac, prepare, coordination, monitor, coordinate, civil, weekly, drawing, substation, saudi, responsible, checking, make, water, check, lighting, solar, including, related, requirements, facility, transformer, specifications, position, pvt, basis, tender, day, consultant, employer, preventive, knowledge, time, plans, duration, working, mechanical, years, team, distribution

Topic  1
date, place, declare,

# **Fisrt Itration of Removing Domain Specific Word**

In [37]:
stop_words = ['format', 'want', 'contact', 'adres', 'relevant', 'avoid', 'aplying', 
              'demonstrate', 'examples', 'employer', 'refer', 'reference', 'phone', 
              'start', 'colege', 'posible', 'sure', 'important', 'use', 'number', 'verbs', 
              'style', 'cover', 'include', 'action', 'clas', 'type', 'leter', 'kep', 
              'separate', 'font', 'draft', 'person', 'aply', 'acomplishments', 'thre', 'speak', 
              'way', 'read', 'ned', 'degre', 'employers', 'volunter', 'begin', 'past',
              'language', 'aditional', 'submit', 'guide', 'candidate',
              'clases', 'highlight', 'make', 'copy', 'oportunity',
              'think', 'aid', 'title', 'lok', 'location', 'organize', 'serve', 'print', 
              'supervise', 'atention', 'contribute', 'expect', 'seking', 'chronological',
              'ask', 'reverse', 'compile', 'white', 'explain', 'coursework',  'easy',
              'note', 'honors', 'brief', 'emphasize', 'erors', 'short', 'construct', 'writen', 
              'author', 'place', 'abroad', 'consider', 'gain', 'choose', 'advise', 
              'know', 'speling', 'internship', 'recent','asociation', 'research', 
              'curiculum', 'program', 'public', 'council', 
              'publications', 'comunity',  'awards',  
              'science', 'practice', 'asociate', 
              'presentations', 'academy', 'evaluation',  'social', 'chair',  'foundation', 
              'studies', 'activities',  'asesment', 'special', 
              'workshop', 'study', 'grant', 'human', 'work', 'world', 
              'felow', 'year', 'honors', 
              'undergraduate', 'life',  'chapter', 
              'seminar', 'non', 'private', 'policy', 'cordinator', 'curent',   
              'future', 'clas',  'conference', 
              'united', 'county', 'home', 'family', 'pres', 'symposium', 'forum', 'john',
              'isues', 'bok', 'case', 'series', 'scholarship', 'history', 
              'graduate', 'editor', 'outstanding', 'suport', 'learning', 'regional',
              'administrative',  'young', 'second', 'thre', 'court', 'initiative'
              'available', 'request', 'personal', 'english', 'references', 'template',
              'example', 'loking', 'info', 'question', 'read', 'qualifications', 'welcome', 
              'prior', 'websites',   'permision', 'download', 'distribute',
              'link', 'dayjob', 'copyright', 'able', 'ccopyright', 'areas', 'duties', 'key', 
              'expertise',  'competencies', 'hons', 'coventry',  'sumary', 
              'details',  'highly', 'nationality',  'jobsekers', 'profesional',
              'driving', 'birmingham', 'license', 'british', 'environment', 'central',
              'work', 'yes', 'suitable', 'nuneaton',  
              'senior', 'working', 'history',  
              'deliver',  'ambitious',  'having',
              'creative', 'providing', 'atention', 'people', 'coleagues', 'carer', 'drive',
              'extensive', 'particular', 'road', 'pose', 'contact', 'sekers', 'run',
              'dob', 'right', 'individual', 'prove', 
              'confident', 'diploma', 'asisting', 'motivate', 'big' ,'interpreted', 
              'arbitrated', 'shaped', 'invented', 'delegated', 'explained', 'surveyed', 
              'wrote', 'inspected',  'checked', 'calculated', 'compared', 
              'stimulated', 'repaired', 'weight', 'fabricated', 'scheduled', 'specialized', 
              'programed', 'contracted', 'adresed', 'educated', 'advertised', 'atained', 
              'modified', 'reinforced', 'examined', 'synthesized', 'acomplished', 'estimated',
              'criteria', 'solved', 'operated', 'directed', 'asembled',
              'influenced' , 'executed', 'persuaded', 'verified', 
              'adapted', 'computed', 'determined', 'fod', 'john', 'aranged', 'entertained', 'established', 
              'instituted', 'revised', 'separate', 'fel', 'furnished', 'originated', 'body',
              'status', 'conceived', 'aplicants', 'published', 'thank', 'unified', 'facilitated', 
              'guidelines', 'transmited', 'familiarized', 'say', 'wish', 'demonstrated', 
              'hired', 'discovered', 'indicate', 'instaled', 'anticipated', 'streamlined', 
              'projected', 'formulated', 'age', 'researched', 'underline', 'cover', 'asesed',
              'gathered', 'employer', 'proposed', 'sure', 'adreses', 'qualified', 'expanded',
              'detected', 'pionered', 'right', 'fashioned', 'clarified', 'indian', 'hindi', 'maried', 'male']

In [38]:
df_after_2 = remove_domain_stop_words(stop_words = stop_words, df_edit = data)

In [39]:
df_after_2

Unnamed: 0,TEXT,SPACY_TEXT
0,information fullnames kisasatiwanaswa idcardno postaladdress box telephoneno emailaddress mikewanaswagmailco mlanguages well spoken and swahilipurpose put the latest inventions telecommunication and information technology for positive impact individuals business enterprises and corporate organizations experience date todate position fixed data networktechnician ben’s electronics services ltdmombasa survey installation integration maintenance support and decommissioning fixed data services using various access technologies wimax fiber microwaves and wifi for safaricomltd survey installation and support ceragon’s access technology for airtel survey installation and support cambridge pmp solutions for safaricom fiber optics splicing and terminations deployment support and maintenance wanlan design installation and technical support structured cabling installation and support for cameras biometrics security controls installation and support for radwin links and ceragon ptmplinks installation and support for telrad wimax bts wifi setup maintenance and support and sipvoip installation and support systems integration and huawei platforms for mpls ppvpn and internet fixed lte installation for safaricom’s enterprise clients date position freelance computertechnician self employed inmombasa installation software and hardware for pcs serversprinters web design and website maintenance repair pcs printers scanners and other computer accessories design installation and support date position scanning officersystemadministrator interim independent electoralcommission scanning omr forms extract data for thevoters creation and maintenance voters’ databaseregister end user training and support voter management system mmc administration mail servers administration,"(personal, information, curriculum, vitae, fullnames, mike, kisasatiwanaswa, idcardno, postaladdress, box, mombasa, telephoneno, emailaddress, mikewanaswagmailco, mlanguages, well, spoken, english, and, swahilipurpose, put, use, the, latest, inventions, telecommunication, and, information, technology, for, positive, impact, individuals, business, enterprises, and, corporate, organizations, work, experience, date, april, todate, position, fixed, data, networktechnician, employer, ben, ’s, electronics, services, ltdmombasa, duties, survey, installation, integration, maintenance, support, and, decommissioning, fixed, data, services, using, various, access, technologies, wimax, fiber, microwaves, and, wifi, for, safaricomltd, survey, installation, and, support, ceragon, ’s, access, technology, for, airtel, survey, installation, and, support, cambridge, pmp, solutions, for, safaricom, fiber, optics, splicing, and, terminations, deployment, ...)"
1,network management data recovery repair and maintenance scanner printers and computers date position freelance computert installation software and hardware for pcs serversprinters repair pcs printers scanners and other computer accessories lanwan support educational background date institution inoorero university school professional nairobicourse computer engineering system administration and supportgrade credit date institution bungoma high schoolbungoma grade kcse mean grade date institution bungoma deb primary schoolbungoma grade kcpe out marks professional trainings date institution amirancommunications course radwin installer pppmp grade distinction date institution broadband networks kenyaltd course vectarstarinstallation grade distinction date institution ceragonnetworks course ceragon certified rollout professionalipcsen grade distinction date institution ruckus course ruckus wireless certified associate support engineerrwcasupe grade distinction date institution iatmombasa course ccna grade distinction,"(network, management, data, recovery, repair, and, maintenance, scanner, printers, and, computers, date, january, march, position, freelance, computert, duties, installation, software, and, hardware, for, pcs, serversprinters, repair, pcs, printers, scanners, and, other, computer, accessories, lanwan, support, educational, background, date, september, december, institution, inoorero, university, kenya, school, professional, studies, nairobicourse, diploma, computer, engineering, system, administration, and, supportgrade, credit, date, feb, november, institution, bungoma, high, schoolbungoma, grade, kcse, mean, grade, date, jan, december, institution, bungoma, deb, primary, schoolbungoma, grade, kcpe, out, marks, professional, trainings, date, july, institution, amirancommunications, course, radwin, installer, pppmp, grade, distinction, date, march, institution, cambridge, broadband, networks, kenyaltd, course, vectarstarinstallation, ...)"
2,date institution viscar industrial capacityltd course fall arrest technicianwork atheight grade distinction possession clean class bce licensereferees patrick odame election coordinator bungoma box bungoma snyodameyahoocou cyrus soi projects manager bens electronics box mombasa infobensele ctronicscoke christine owuor fixed access engineer safaricom box cowuorsafaricomcoke kelvin ongoro field engineersafaricom box kongorosafaricom coke,"(date, july, institution, viscar, industrial, capacityltd, course, fall, arrest, technicianwork, atheight, grade, distinction, note, possession, clean, class, bce, driving, licensereferees, patrick, odame, regional, election, coordinator, bungoma, box, bungoma, phone, snyodameyahoocou, cyrus, soi, projects, manager, bens, electronics, box, mombasa, phone, infobensele, ctronicscoke, christine, owuor, fixed, access, engineer, safaricom, box, nairobi, phone, cowuorsafaricomcoke, kelvin, ongoro, field, engineersafaricom, box, nairobi, phone, kongorosafaricom, coke)"
3,curriculum vitae ayush pandey current address plot shastri nagar pradesh permanent address plot shastri nagar pradesh passport issue lucknow date shubpandeyyahooin objective challenging and rewarding position growthoriented organization this offers diverse job responsibility mechanical engineering new product introduction and will fully utilize technical and managerial skills education qualification bachelor mechanical engineering seth sriniwas agarwal institute engineering technology uptu kanpur awarded class completed xiith standard the from sardar patel awarded class completed xth standard the from ram krishana mission higher school kanpur board awarded class employment frontier alloy steels ltd kanpur upindia design development engineer deptt present understand and the complete component drawings developing modelling and drawings with gdt component and pattern finite element analysis for stress strain and deformation analysis with the help software casting simulation solve methoding and find out defects like shrinkage through procast software before production engineering process machining inspection inspection the products defect analysis and then implement the ways minimize present defect within time process planning execution lead the shop floor per requirement when products are under development deptt maintain all the records per international standards develop different types components ayush pandey shubpandeyyahooin page,"(curriculum, vitae, ayush, pandey, current, address, plot, shastri, nagar, kanpur, uttar, pradesh, india, permanent, address, plot, shastri, nagar, kanpur, uttar, pradesh, india, passport, place, issue, lucknow, expiry, date, shubpandeyyahooin, objective, challenging, and, rewarding, suitable, position, growthoriented, organization, this, offers, diverse, job, responsibility, mechanical, engineering, new, product, introduction, and, will, fully, utilize, technical, and, managerial, skills, education, qualification, bachelor, mechanical, engineering, seth, sriniwas, agarwal, institute, engineering, technology, uptu, kanpur, awarded, first, class, completed, xiith, standard, the, year, from, sardar, patel, inter, college, kanpur, board, awarded, first, class, completed, xth, standard, the, year, from, ram, krishana, mission, higher, secondary, school, kanpur, board, ...)"
4,purpose machine remove burs from sharp corner edges after machining software skill strong command below given software for computer aided designing catia for modeling partassembly and drafting both procast for casting simulation detect shrinkage air entrapment molten metal flow ugnx for modeling partassembly drafting further analysis solidworks for modeling partassemblydrafting knowledge for finite element analysis structural analysis solidcam basic knowledge for machining autocad avicad for drafting engineering computer application technical skill excellent skills designing drawings models excellent skills casting manufacturing process pipe designing engineering computer aided designing good presenter plant layout design development process planning facility planning documentations project management risk management lesson and implementation machine specification and finalization power grid corporation limited pwd frontier topic manufacturing hot coiled helical springs major projects current industrial project “couplerknuckle” which are also using wagons industrial project sep team members role this project did all the designing developing part components such drafting modeling casting simulation for defect free casting quality proof casting documentation industrial project “ gear sl” which are using wagons industrial project role this project lead the designing portion for this product team members automated flexialbe transfer line college project role this project completed all the designing part such ddrawings dmodeling team members running project coach coupler with balanced gear ayush pandey shubpandeyyahooin page,"(special, purpose, machine, remove, burs, from, sharp, corner, edges, after, machining, software, skill, having, strong, command, below, given, software, for, computer, aided, designing, catia, for, modeling, partassembly, and, drafting, both, procast, for, casting, simulation, detect, shrinkage, air, entrapment, molten, metal, flow, ugnx, for, modeling, partassembly, drafting, further, analysis, solidworks, for, modeling, partassemblydrafting, ansys, knowledge, for, finite, element, analysis, structural, analysis, solidcam, basic, knowledge, for, machining, autocad, avicad, for, drafting, reverse, engineering, diploma, computer, application, technical, skill, excellent, skills, designing, drawings, models, excellent, skills, casting, manufacturing, process, pipe, designing, reverse, engineering, computer, aided, designing, good, presenter, plant, layout, design, development, process, ...)"
...,...,...
9769,completed four weeks summer training from“ railway signal and networking gorakhpur” experience computer networking and accounting experience technosys company ltd project mini project “library management system” team size responsibility about diagram coding description computer shop software multilanguage software for store billing management sell hardware systems components and accessories computer shop management system project will used for various purposes under computer shop major project “dual failure resiliency through backup mutual exclusion” team size description the fast advancing global information infrastructure including information technology and computer networks such the internet and telecommunications systemsenable the development electronic commerce global level the nearly universal connectivity which the internet offers has made invaluable business tool these developments have created new economy which many call the ‘digital economy’ this fast emerging economy bringing with rapidly changing technologies increasing knowledge intensity all business and creating virtual supply chains and new forms businesses and service delivery channel such ebanking achievements got third position high jump college athletics event profile father’s name ram naval yadav date birth languages known address villagegosaipur postjungle kauriya district gorakhpur declaration hereby declare that all the information mentioned above true and correct the best knowledge,"(completed, four, weeks, summer, training, from, “, indian, railway, signal, and, networking, gorakhpur, ”, experience, computer, networking, and, accounting, work, year, experience, technosys, company, ltd, project, mini, project, title, “, library, management, system, ”, team, size, responsibility, study, about, diagram, coding, language, description, computer, shop, software, multilanguage, software, for, store, billing, management, sell, hardware, systems, components, and, accessories, computer, shop, management, system, project, will, used, for, various, purposes, under, computer, shop, major, project, title, “, dual, link, failure, resiliency, through, backup, link, mutual, exclusion, ”, team, size, description, the, fast, advancing, global, information, infrastructure, including, information, technology, and, computer, networks, ...)"
9770,date amit kumar yadav itm email idamityadavitmgmailcom gida gorakhpur mobile,"(date, amit, kumar, yadav, place, itm, email, idamityadavitmgmailcom, gida, gorakhpur, mobile)"
9771,resume nishanth kumar address ramesh suvarna manjunath pranthya village mangalore taluk district karnataka email nishanthkumar gmailcom looking for opportunity reputed organization which will help best and upgrade skills engineering and meet the demands the organization company name jbm ogihara automotive india pvt ltd designation engineer trainee period june july roles and responsibilities oversee mechanical design phases from concept through production collaborate with mechanical designers large scale design project selected assist the tool design engineer the development jigs and fixtures using autocad solid edge and catia conducting the inspection and measuring the parts using various scale gauges and tools auditing the poka yoke error proof kaizen performed production team and approving the same countermeasure report for customer problems field complaints ascertaining proper adequate safety shop floor among team members employing kaizen quality department making simple but effective gauges for checking parts coordinating with jig maintenance and production control department achieve the periodic production targets course institution board passing percent jain high school karnataka objectives experience accademicqualification,"(resume, nishanth, kumar, address, ramesh, suvarna, manjunath, road, pranthya, village, mangalore, taluk, district, karnataka, email, nishanthkumar, gmailcom, phone, looking, for, opportunity, reputed, organization, which, will, help, deliver, best, and, upgrade, skills, engineering, and, meet, the, demands, the, organization, company, name, jbm, ogihara, automotive, india, pvt, ltd, designation, diploma, engineer, trainee, period, june, july, roles, and, responsibilities, oversee, mechanical, design, phases, from, concept, through, production, collaborate, with, senior, mechanical, designers, large, scale, design, project, selected, assist, the, tool, design, engineer, the, development, jigs, and, fixtures, using, autocad, solid, edge, and, catia, conducting, the, inspection, and, measuring, the, parts, using, various, scale, ...)"
9772,sslc moodbidri secondary education examination board puc jain junior college moodbidri department pre university education automobil engg snm polytechnic moodbidri board technical education bangalore programming languages basic programming html programming platform windows windows windows software catia solid edge autocad achieved zero ppmzero defects counter measure taken using why why analysis’ successfully installed crack detection lights for detection cracks and pinholes the parts catia and autocad certificate cad desk attended safety training conducted joai father’s name ramesh mother’s name nalini date birth gender marital single languages kannada tulu telugu hereby declare that particulars given above are true the best knowledge and take full responsibility for any discrepancy false information contribution and achievements mechanical and related skills trainings and certificates,"(sslc, moodbidri, secondary, education, examination, board, puc, jain, junior, college, moodbidri, department, pre, university, education, diploma, automobil, engg, snm, polytechnic, moodbidri, board, technical, education, bangalore, programming, languages, basic, programming, html, programming, platform, windows, windows, windows, software, catia, solid, edge, autocad, achieved, zero, ppmzero, defects, counter, measure, taken, using, why, why, analysis, ’, successfully, installed, crack, detection, lights, for, detection, cracks, and, pinholes, the, parts, catia, and, autocad, certificate, cad, desk, attended, safety, training, conducted, joai, father, ’s, name, ramesh, mother, ’s, name, nalini, date, birth, gender, male, nationality, indian, marital, status, single, languages, know, english, hindi, kannada, tulu, telugu, hereby, ...)"


In [40]:
tfidf2 = TfidfVectorizer(stop_words='english')
doc_words2 = tfidf2.fit_transform(df_after_2.TEXT)
tfidf_df2 = pd.DataFrame(doc_words2.toarray(),columns=tfidf2.get_feature_names_out())

In [41]:
topic_nums = list(np.arange(10, 20, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words2)
    display_topics(nmf, tfidf2.get_feature_names_out(), 100)
    print("------------------------------------------")


Topic  0
project, site, construction, engineer, works, preparation, drawings, client, civil, projects, quality, execution, safety, inspection, material, progress, design, designation, company, building, materials, engineering, drawing, preparing, coordination, testing, planning, job, checking, water, commissioning, duration, contractors, supervision, consultant, responsibilities, responsible, solar, schedule, reports, ensure, specification, contractor, residential, specifications, piping, approved, structural, quantity, test, experience, management, india, cost, sub, prepare, mechanical, ensuring, power, monitoring, manager, required, plan, pvt, saudi, pipe, consultants, plant, completion, requirement, control, standards, engineers, coordinating, line, concrete, approval, report, manpower, weekly, time, requirements, team, hvac, contract, documents, review, limited, period, layout, bills, autocad, role, technical, contracting, dec, various, daily, steel, final

Topic  1
date, declare,

# **Second Itration of Removing Domain Specific Word**

In [42]:
stop_words_2 = ['resume', 'list', 'use', 'position',
                'job', 'present', 'include', 'special',
                'carer', 'adres', 'relevant', 'sumer', 'leter', 'fal',
                'state', 'parent','volunter', 'cover', 'wil',
                'section', 'comunity', 'member', 'objective','intern', 
                'society', 'title', 'make', 'clas', 'center', 'sample', 'august',
                'employer', 'internship', 'monash', 'page', 'graduate', 'profesional', 'asisted', 'language',
                'campus', 'honors', 'help', 'city', 'public', 'word', 'asociation', 'year', 'bachelor',
                'staf', 'contact','format', 'events', 'chicago', 'specific', 'date', 'social', 'nebraska', 
                'point', 'neds','project', 'use','team', 'end', 'create','proces','document','role',
                'experience', 'case', 'requirement', 'description', 'report', 'worked', 'used',
                'object', 'like', 'lead', 'source', 'custom', 'provide', 'enterprise', 'tol', 'hibernate',
                'new',  'team', 'representative', 'store', 'teritory', 
                'year','brand', 'experience', 'train', 'key', 'city', 'goals', 'profesional', 'years',
                'generate', 'present', 'regional', 'anual', 'quota', 'executive', 'representatives', 'lead',
                'achieved', 'build', 'skils', 'client', 'rep', 'oportunities', 'achievements', 
                'york', 'latin', 'region', 'achieve', 'texas', 'entries', 'knowledge', 
                'asociate', 'direct', 'industry', 'profitability', 'position', 'promotion', 'plan', 'consistently',
                'distribution', 'national', 'university', 'online', 'leadership', 'advertise',
                'kansas', 'profit', 'major', 'global', 'responsible', 'internet', 'bachelor', 'international',
                'statements','prepared', 'prepare', 'corporate', 'ledger', 'capital', 'company',
                'reconciliation', 'skils', 'experience','receivable','flow', 
                'internal', 'ability', 'preparation', 'entry', 'principles', 'parkland', 'general', 'project',
                'proces', 'plan', 'new', 'anual', 'quarterly','resume', 'cityland', 'review',
                'chicago', 'team', 'performed', 'work', 'cpa',
                'profesional', 'chartered', 'level', 'bachelor', 'fitzwiliam', 'email', 
                'strong', 'main', 'qualifications', 'performance', 'record', 'stret', 'senior', 'university'
                'exprience', 'details', 'months', 'les', 'description', 'maharashtra', 'year', 'company', 'pune',
                'january', 'monthscompany', 'mumbai', 'pvt', 'nagpur','india', 'june', 'universityhrskil', 
                'work', 'diploma', 'working', 'hibernate', 'worked', 'nashik', 'god', 'telangana', 'skils', 
                'requirement', 'learn', 'django', 'cricket', 'phule', 'savitribai', 'knowledge',
                'board', 'skil', 'amravati', 'secondary', 'university', 'role', 'servlet', 
                'institute', 'duration', 'title', 'solution', 'pradesh',
                'days', 'like', 'angular', 'job', 'solapur',
                'dayjob', 'personal', 'birmingham', 'template', 'coventry', 'able', 'use', 'info', 
                'available', 'work', 'skils', 'download', 'ability', 'distribute',
                'com', 'ccopyright', 'fod', 'copyright', 'make', 'permision', 'welcome', 
                'question', 'loking','prior', 'read', 'example', 'hons', 'cvs', 'page', 'help', 'nuneaton', 'gary',
                'company', 'duties','references', 'pas', 'jobsekers',
                'areas', 'relate', 'key', 'request', 'resume', 'competencies', 'ambitious', 'email',
                'driving', 'yes', 'restaurant', 'vyse', 'sekers', 'suitable', 'expertise', 'level',
                'qualifications', 'team', 'understand', 'pose', 'god', 'experience', 'maner',
                'knowledge', 'license', 'particular', 'highly', 'create', 'confident', 'nationality',
                'write', 'responsible', 'profesional', 'having', 'peg', 'staf', 'wel', 'excite', 
                'project', 'providing','project', 'record', 'environment',
                'train', 'company', 'performance',  'plan', 'implement', 'include', 'staf', 
                'team',  'new', 'resource', 'profesional', 'conduct', 'reduce', 'procedures',
                'relations', 'training', 'executive', 'personel', 'xae', 'senior', 'job', 'result', 
                'candidates', 'isues','department', 'division', 'improve','change', 'identify', 
                'leadership', 'lead', 'provide', 'time', 'work', 'level', 'save',
                'project', 'work','plant','team', 'specifications', 'plan', 'environmental','experience',
                'company', 'knowledge', 'include', 'india','phase', 'client',
                'complete', 'preparation', 'society','wind','scope', 'sub', 'completion', 
                'excelent', 'forte', 'god', 'skils', 'poses', 'key', 'air', 'structure', 'prepare',
                'farm','multitasking', 'agriculture', 'qualification', 'til', 'know', 'horticulture',
                'fluent', 'knowledge', 'profesional', 'template', 'florida', 'latin',
                'date', 'proficient', 'presentations', 'pest', 'gadgets', 'significant',
                'report', 'degre', 'achievement', 'player', 'resume', 'english', 'languages',
                'include', 'behavior', 'interests', 'skiled','various', 'cary', 'adept', 'bachelor', 'com',
                'crop', 'wild', 'extensive', 'stret', 'gmail', 'amigo',
                'nursery', 'supervisor', 'update', 'experience', 'miami', 
                'exceptionaly', 'wel', 'joye', 'diploma', 'harvest', 'won', 
                'mower', 'objective', 'sonata', 'carer', 'thompson', 'study', 'gren', 
                'display', 'understand','new', 'skils', 'qualifications', 'strong', 'open',
                'teler', 'cal', 'procedures', 'ability', 'work', 'knowledge',
                'check', 'phone', 'experience', 'asistant', 'excelent', 'client', 
                'god', 'georgia', 'representative', 'telers', 'daily', 'various', 
                'objective', 'asisted', 'sumary', 'profesional', 'provide', 'request', 'jersey', 
                'parkland', 'personalize', 'cals', 'policies', 'reference', 'cityland',
                'efective', 'position', 'handle', 'center', 'provided', 'carer',
                'handled', 'seking', 'fod', 'com', 'acount', 'email',
                'high', 'orient', 'interpersonal', 'properties', 'order', 'furnish', 'train', 'handling',
                'adept','york', 'present', 'nevada' ,'experience', 'campaign', 'frelance', 'online', 'work',
                'clients', 'xad','include', 'gogle','new', 'mobile', 'create',
                'facebok', 'galery', 'layout', 'university','mac', 'model', 'concept', 'layouts',
                'concepts', 'present', 'monash', 'team', 'york', 'lead', 'created', 
                'california', 'corporate', 'twiter', 'company', 'usability',
                'ideas', 'sample', 'client', 'skils', 'direction','care', 
                'asociation', 'center', 'comunity', 'xad', 'practice', 'present', 
                'university', 'member', 'department', 'terminology', 'eagan', 'staf', 'schol'
                'american', 'provide', 'education', 'public', 'colege', 'new'
                'carolina', 'york', 'conference', 'board', 'group', 
                'state', 'ability', 'experience', 'knowledge', 'kovner', 'perform', 'cordinator', 
                'profesional', 'condition', 'train', 'family', 'home', 'biling', 'registered', 
                'record', 'general', 'seidman', 'work','anual', 'ray', ]

In [44]:
df_after_3 = remove_domain_stop_words(stop_words = stop_words_2, df_edit = df_after_2)

In [45]:
tfidf3 = TfidfVectorizer(stop_words='english')
doc_words3 = tfidf3.fit_transform(df_after_3.TEXT)
tfidf_df3 = pd.DataFrame(doc_words3.toarray(),columns=tfidf3.get_feature_names_out())
tfidf_df3

Unnamed: 0,aa,aaa,aaai,aaansariaamirgmailcom,aab,aabar,aabari,aabbdduull,aabdul,aabkari,...,ﬁxes,ﬂash,ﬂexible,ﬂights,ﬂow,ﬂows,ﬂute,ﬃce,ﺪﻤﺣأ,ﻢﻴﻠﺳ
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9770,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9771,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
topic_nums = list(np.arange(10, 20, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words3)
    display_topics(nmf, tfidf3.get_feature_names_out(), 100)
    print("------------------------------------------")


Topic  0
skills, school, college, engineering, computer, organization, career, good, professional, technical, delhi, academic, mechanical, technology, course, passed, educational, software, windows, higher, passing, communication, vitae, office, challenging, growth, btech, curriculum, percentage, class, intermediate, basic, certificate, cbse, chennai, excellent, marks, skill, institution, address, operating, design, engineer, utilize, excel, industrial, completed, participated, degree, microsoft, sslc, cad, auto, nagar, hsc, hard, electronics, autocad, opportunity, abilities, polytechnic, month, electrical, using, power, profile, hardware, examination, cgpa, seeking, civil, development, management, sec, matriculation, programming, attended, inter, technologies, oriented, field, kumar, positive, post, ssc, strengths, mob, lucknow, problem, birth, proficiency, bseb, organizational, kerala, production, summer, analytical, attitude, designing, aggregate

Topic  1
declare, true, declaratio




Topic  0
testing, commissioning, installation, solar, test, power, systems, control, troubleshooting, software, server, support, erection, network, windows, configuration, hardware, panel, energy, engineer, using, projects, design, instruments, inspection, quality, protection, cable, technical, instrumentation, cables, automation, loop, equipment, transformer, designing, calibration, field, relay, operating, configuring, relays, line, instrument, piping, thermal, data, plc, networking, grid, programming, development, gas, user, access, alarm, lan, tools, servers, installing, desktop, switch, issues, dcs, scada, based, cctv, backup, switches, limited, devices, implementation, analysis, transformers, services, pressure, earth, plants, technologies, tests, pipe, manual, remote, different, documentation, security, monitoring, abb, checking, solutions, process, selection, valves, technology, certified, hands, station, functional, management, unit

Topic  1
declare, true, best, declaration,

# **Third Itration of Removing Domain Specific Word**

In [47]:
stop_words_3 =['diligence', 'real', 'sector', 'institutional','potential',  'acros',
               'acquisition', 'target', 'pitch', 'blomberg', 'coverage','focus', 'gpa',
               'relationship', 'mandaluyong','relationships',
               'raise', 'sectors', 'select', 'ra', 'chinese', 'cros', 'beijing', 'shanghai', 'nus', 'mandarin',
               'term','evaluate', 'native', 'asian', 'club', 'increase', 'quantitative' ,'philipines',
               'bicutan', 'jaleco', 'pasay', 'apostolic', 'pasword', 'february',
               'newly','ofers', 'centre',
               'utilize', 'profile','edemed', 'makati', 'adamson', 'leadsrus', 'sugestive', 'askim', 
               'teletech', 'turkish', 'roxas','masterfile', 'mutena',
               'aque', 'acomodating', 'impresion', 'yah', 'destinations', 'roduct', 'tutorial', 'deira', 'kfc', 'enrol',
               'para', 'philipine', 'merchants', 'satisfy', 'preference', 'character', 'discusing', 'roy',
               'prevention','skype', 'reward', 'detect', 'productivity', 'setup', 'fit', 'discusion', 'acurately',
               'proposal','dubai', 'uae', 'admin', 'items', 'rate', 'listen', 'employ', 'block', 'necesary',
               'promote', 'availability', 'acurate', 'december', 'exist', 'rat', 'standard',
               'dubai','designation', 'supervision', 'saudi', 'progres', 'pip', 'uae', 'aproved','structural', 'cable',
               'kenya', 'chenai', 'flor', 'taif',  'quantity','ksa', 'authority', 'calculation',
               'pile','pasport', 'tamil', 'delhi', 'wals', 'wekly', 'load','arabia', 'require', 'ering', 'gi', 'clearance', 'standards',
               'workers', 'area', 'comisioning', 'selection','auxiliary', 'foundations', 'split', 'aproval', 'erection', 'snag'
               'malaysia', 'charter', 'ernst', 'estate', 'standards', 'regulatory', 'real', 'charge', 'statutory', 'nanyang',
               'mas', 'chinese','australia', 'pricewaterhousecopers', 'consolidation', 'entities',
               'gap', 'touche', 'local', 'property', 'sgx', 'members', 'overseas', 'paper', 'junior', 'partner',
               'mandarin', 'club', 'students', 'student', 'acordance', 'proceses',
               'lim', 'recomendations', 'pte', 'diferent', 'honour', 'programe',
               'equity', 'involvement', 'weakneses', 'statement', 'advance', 'udit', 'cycle', 'cantonese', 'tight', 'evaluate', 'deadlines', 'ntu'
               'angeles', 'cloutier', 'remix', 'jenifer', 'avenue', 'cienega', 'los', 'ele', 'david', 'michael',
               'oreal', 'kate', 'marie', 'jesica', 'cloutieremix', 'cali', 'fornia', 'claire', 'kely', 'lopez',
               'beyonce', 'harper', 'bazar', 'elizabeth','jones', 'michele', 
               'diane', 'rachel', 'mathew', 'stone', 'christina', 'sarah', 'vanity', 'paul', 'jam', 'pantene', 'chris',
               'fair', 'kim', 'robert', 'smith', 'taylor', 'vanesa', 'melisa', 'wiliams', 'marcus',
               'tom', 'eva', 'lisa', 'mary', 'laura', 'rolston', 'davis', 'jeans', 'lauren', 'elen', 'julia', 'girl', 'peter', 'ryan', 'jane',
               'mark', 'nicole', 'graham', 'jason', 'wilson', 'italy', 'patrick', 'tyler', 'ashley', 'groming', 'andrew', 'miler',
               'hunter', 'scot', 'secret', 'wekly', 'fox', 'videos', 'victoria', 'ane','hedge', 'nav', 'operations',
               'compliance',  'resolve', 'citco', 'net','fes', 'calculation',  'blomberg','prime','regulatory', 'timely', 'external', 
               'setlement', 'equities','futures', 'calculations', 'redemption', 'pte','derivatives', 'agrements', 'valuations',
               'polytechnic', 'apr', 'party', 'geneva', 'closely', 'custodians', 'hoc', 'break',  'bond', 'migration',
               'london','acurate','function', 'midle', 'options','act', 'requirements', 'honour', 'acordance', 'eficiency', 
               'alternative', 'basis', 'launch', 'mar','fix' ,'maters', 'minutes','resolutions', 'file', 'legal',
               'liaise','incorporation','leave', 'malaysia', 'register', 'ofshore', 'pte', 'governance', 'chinese', 'reason', 
               'share', 'regulatory','acra', 'ful', 'notice','requirements', 'hoc', 'ing', 'form', 'month', 'act', 'comites', 'external',
               'march', 'proper', 'kuala', 'lumpur', 'return', 'cordinate', 'atend', 'viewpoint', 'function', 'agenda',
               'party', 'aranging', 'wpm', 'registration', 'spectrum', 'availability', 'range','bhd','sdn', 'travel', 'hkics',
               'require', 'tunku', 'keping','malaysian', 'cantonese',
               'authority', 'counsel', 'agrements', 'overseas', 'islands', 'wong', 'sin', 'lci','regulations', 'renewal','chenai',
               'declaration', 'declare', 'true', 'birth', 'hobies', 'marital',  'singh', 'jaipur',
               'tamil', 'play', 'pasing', 'pasport', 'hard', 'delhi', 'kumar', 'gender',
               'self', 'single', 'mentionvidyalaya','listen','dist', 'higher', 'percentage', 'engine', 'kerala', 'institution', 'belief',
               'cbse', 'kendriya','divyank', 'nagar', 'mark', 'mail',  'pin', 'corect', 'odisha', 'lucknow', 'sex', 
               'chalenging', 'strength', 'vitae', 'production', 'positive', 'cgpa', 'abilities', 
               'utilize', 'profile', 'permanent', 'operate', 'atitude', 'father', 'mob', 
               'extra', 'basic', 'topic', 'active', 'problem', 'auto', 'noida', 'curicular', 'quick' 
               'erection', 'relay', 'circuit', 'grid','equipments', 'transformers','earth', 'switch',
               'troubleshoting', 'wire', 'light', 'instrument', 'scada', 'engine','comisioningpanel', 'equipment',
               'saudi', 'thermal', 'mysore', 'breakers', 'line', 'stel', 'instrumentation', 'material',
               'schedule', 'field', 'breaker', 'inverter', 'gujarat', 'maintain', 'diesel',
               'water', 'execution', 'profile', 'capacity', 'batery', 'pcb', 'calibration', 'qatar',
               'generation', 'tech', 'termination', 'tamilnadu', 'operate', 'bms', 'lab', 'generator',
               'starters', 'sri', 'lay', 'inverters', 'suply','takahashi','indiana', 'pensylvania', 'sameshima', 'american',
               'delhi', 'sport','joint', 'paulo',  'bacal', 'ahmedabad', 'atended','ortho', 'ankit', 'hand', 'vadodara',
               'vitae', 'bangalore','september', 'nitrini', 'carameli', 'goacon', 'gujarat', 'kenya',
               'chenai', 'paper', 'usa', 'total', 'march', 'cadaveric', 'mckirgan', 
               'post', 'ioacon', 'iowa', 'comite', 'broklyn', 'poster',
               'posterior', 'november', 'october', 'unite', 'february', 'centre','princeton',
               'ganga', 'mangalore', 'maryland', 'presentation', 'brazil', 'tamilnadu', 'orthop',
               'kentucky', 'philipsburg','month','pte','ful', 'timely', 'payrol', 'file', 'gst', 'schedule', 'forecast', 'liaise',
               'return', 'close','statement', 'function', 'submision', 'external', 'basis','period', 'operations', 'reconcile', 'apr',
               'treasury', 'erp','branch', 'wekly', 'task','consolidation', 'hoc', 'query', 'myob', 'yearly', 'ing', 'cheque', 'consolidate',
               'polytechnic','claim', 'journal', 'subsidiaries', 'require', 'fix', 'verify', 'supliers', 'mar', 'kenya', 'receive', 'count',
               'powerpoint']

In [48]:
df_after_4 = remove_domain_stop_words(stop_words = stop_words_3, df_edit = df_after_3)

In [50]:
tfidf4 = TfidfVectorizer(stop_words='english')
doc_words4 = tfidf4.fit_transform(df_after_4.TEXT)
tfidf_df4 = pd.DataFrame(doc_words4.toarray(),columns = tfidf4.get_feature_names_out())
tfidf_df4

Unnamed: 0,aa,aaa,aaai,aaansariaamirgmailcom,aab,aabar,aabari,aabbdduull,aabdul,aabkari,...,ﬁxes,ﬂash,ﬂexible,ﬂights,ﬂow,ﬂows,ﬂute,ﬃce,ﺪﻤﺣأ,ﻢﻴﻠﺳ
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9770,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9771,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
topic_nums = list(np.arange(7, 15, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words4)
    display_topics(nmf, tfidf4.get_feature_names_out(), 100)
    print("------------------------------------------")


Topic  0
maintenance, electrical, operation, installation, power, tower, mep, systems, panel, commissioning, engineering, control, lighting, rise, substation, testing, facility, storied, kva, engineer, transformer, wiring, ups, building, troubleshooting, possess, hvac, projects, preventive, electronics, contractor, hours, trouble, shooting, including, cables, ppm, kame, khor, makkah, electrician, day, gears, staff, voltage, technician, hold, alarm, monitoring, management, monthly, motors, professional, panels, safety, problems, repair, breakdown, protection, related, solar, repairing, trial, machine, taking, gas, plc, vcb, generators, following, valuation, schedules, energy, plumbing, battery, mechanical, pumps, pressure, electronic, spare, planning, service, operational, vfd, servicing, parts, shift, low, instruments, machines, drawings, fighting, responsibilities, engines, treatment, summary, subcontractors, ats, industrial, progress

Topic  1
skills, school, college, engineering, c

# **Data Labeling**

In [54]:
nmf = NMF(11)
doc_topic_nmf = nmf.fit_transform(doc_words4)
display_topics(nmf, tfidf4.get_feature_names_out(), 100)


Topic  0
electrical, maintenance, power, commissioning, testing, installation, panel, engineering, engineer, wiring, solar, control, operation, transformer, test, systems, voltage, electrician, protection, energy, lighting, projects, plc, motors, panels, electronics, gas, preventive, cables, repairing, technician, industrial, battery, instruments, motor, limited, low, tray, relays, transmission, machine, repair, trouble, gulf, shooting, kva, switchgear, laying, service, doha, breakdown, july, alarm, till, earthing, automation, types, tension, substation, design, troubleshooting, oil, fault, abb, mcc, using, machines, services, responsibility, organization, electric, management, responsibilities, cctv, plants, pumps, ups, loop, mva, boards, industries, planning, unit, turbine, current, technical, works, station, components, designing, mohammad, dcs, diagram, diagrams, chennai, glanding, refrigeration, set, professional, mdb

Topic  1
skills, school, college, engineering, computer, orga

In [55]:
topics_of_resume = pd.DataFrame(doc_topic_nmf, columns = ['Finance', 'Hospitality' , 
                                                          'Electrical_and_Mechanical_Engineering' ,
                                                          'Accounting' , 'Investment' ,
                                                          'Beauty_Artist', 
                                                          'Sales_and_Marketing' , 'Others' , 
                                                          'Secretarial' ,'Accounting' , 
                                                          'Health'])
topics_of_resume['Topic_Name'] = topics_of_resume.idxmax(axis=1)
topics_of_resume

Unnamed: 0,Finance,Hospitality,Electrical_and_Mechanical_Engineering,Accounting,Investment,Beauty_Artist,Sales_and_Marketing,Others,Secretarial,Accounting.1,Health,Topic_Name
0,0.020622,0.003994,0.026564,0.001799,0.000105,0.001173,0.035573,0.012777,0.013299,0.000000,0.002321,Sales_and_Marketing
1,0.003947,0.039175,0.009228,0.000000,0.000000,0.000000,0.000000,0.000000,0.003346,0.002189,0.003228,Hospitality
2,0.001116,0.009441,0.003554,0.000000,0.002858,0.000000,0.013625,0.000000,0.000000,0.005417,0.000000,Sales_and_Marketing
3,0.003965,0.047902,0.013113,0.000616,0.019998,0.011939,0.002183,0.000000,0.001249,0.000667,0.010942,Hospitality
4,0.009024,0.047046,0.018793,0.001393,0.014433,0.000000,0.000000,0.000000,0.000000,0.002226,0.000964,Hospitality
...,...,...,...,...,...,...,...,...,...,...,...,...
9769,0.000000,0.036788,0.041363,0.000000,0.000000,0.028259,0.000000,0.000000,0.005764,0.000000,0.000000,Electrical_and_Mechanical_Engineering
9770,0.000000,0.003772,0.000000,0.000000,0.000000,0.003089,0.000000,0.000000,0.000000,0.000000,0.000000,Hospitality
9771,0.008722,0.040875,0.010314,0.000000,0.024902,0.003389,0.000000,0.000000,0.008236,0.022841,0.000000,Hospitality
9772,0.000000,0.046050,0.000000,0.000000,0.000000,0.017894,0.000102,0.000000,0.003884,0.024098,0.000000,Hospitality


# **Store the Labeld Dataset into Pickle**

In [56]:
write_pickle('Resumes_PDF.pickle', topics_of_resume)