In [1]:
#Libraries
import numpy as np
import pandas as pd
import spacy
import re
import gensim.downloader as api
from gensim.models import KeyedVectors

nlp = spacy.load('en_core_web_md')

In [2]:
# Importing data into dataframes
gradProgramsJson = "../Data/Gradeo-data/graduateProgrammes.json"
internshipsJson = "../Data/Gradeo-data/internships.json"
vacworkJson = "../Data/Gradeo-data/vacwork.json"

gradProgramsDf = pd.read_json(gradProgramsJson)
internshipsDf = pd.read_json(internshipsJson)
vacworkDf = pd.read_json(vacworkJson)

### Helper Functions

In [3]:
# Dropping all the unnesecary columns => companyLogo; applicationURL; deadline
def coldropper(df):
    df.drop(["companyLogo", "applicationURL", "deadline"], axis=1, inplace=True)

# Remaps all locations in list of dataframes to the provinces. Good for low record volumes
def location_mapper(dflist, location_mapping_dict):
    for i in dflist:
        i["location"] = i["location"].map(location_mapping_dict)

# Gets similar words
def synoget(word_vectors, seed_word):
    synlis = []
    try:
        result = word_vectors.most_similar(positive=[seed_word.lower()])
        for r in result:
            synlis.append(r[0])
    except:
        print("None")
    return synlis

### Helper Vars

In [4]:
word_vectors = api.load("glove-wiki-gigaword-100")

location_mapping_dict = {
    'Addo Elephant National Park':'Eastern Cape',
 'Alberton':"Gauteng",
 'Boksburg':"Gauteng",
 'Bronkhorstspruit':"Gauteng",
 'Cape Town':"Cape Town",
 'Centurion':"Gauteng",
 'Durban':"KwaZulu-Natal",
 'Gauteng':"Gauteng",
 'Germiston':"Gauteng",
 'Hotazel':"Northern Cape",
 'Johannesburg':"Gauteng",
 'Kempton Park':"Gauteng",
 'Kuruman':"Northern Cape",
 'KwaZulu-Natal':"KwaZulu-Natal",
 'Midrand':"Gauteng",
 'Modderfontein':"Gauteng",
 'Mpumalanga':"Mpumalanga",
 'Mulbarton':"Gauteng",
 'Nation wide':"Nationwide",
 'North West':"North West",
 'Polokwane':"Limpopo",
 'Port Elizabeth':"Eastern Cape",
 'Pretoria':"Gauteng",
 'Richards Bay':"KwaZulu-Natal",
 'Roodepoort':"Gauteng",
 'Rustenburg':"North West",
 'Sandton':"Gauteng",
 'Thohoyandou':"Limpopo",
 'Vereeniging':"Gauteng",
 'Weltevredenpark':"Gauteng",
}

In [5]:
# Dropping unnecessary columns
coldropper(gradProgramsDf)
coldropper(internshipsDf)
coldropper(vacworkDf)

In [6]:
# Remapping all the locations
location_mapper([gradProgramsDf, internshipsDf, vacworkDf], location_mapping_dict)

In [7]:
# Combining all dataframes into 1
jobsDf = pd.concat([gradProgramsDf, internshipsDf, vacworkDf], axis=0)

In [8]:
#Creating seperate columns for each named entity
ner = nlp.pipe_labels['ner']
for i in ner:
    jobsDf[i] = ''

In [9]:
#NER main
for ind,i in enumerate(jobsDf["description"]):
    try:
        doc = nlp(i)
        for ent in doc.ents:
            jobsDf[ent.label_][ind] = jobsDf[ent.label_][ind] + "," + ent.text
    except:
        pass

In [10]:
# Only using NERs
collist = ["FAC", "GPE", "ORG", "PERSON", "PRODUCT", "WORK_OF_ART"]
df = pd.DataFrame( columns=collist)
joblist = []

for i in collist:
    for ind, j in enumerate(jobsDf[i]):
        refined = set(str(j).replace("0", "").replace("...\n","").split(","))
        reflist = list(refined)
        filteredls = [element for element in reflist if len(element) >= 3]
        finalstr = ', '.join(filteredls)
        finalstr = re.sub(r'\s+|[()\xa0&]', ' ', finalstr)
        word_list = re.findall(r'[A-Z][a-z]*', finalstr)
        filteredls = [element for element in word_list if len(element) >= 3]
        filteredls = list(set(filteredls))
        finalstr = ', '.join(filteredls)
        joblist.append(finalstr)
    df[i] = joblist
    joblist = []

In [11]:
# Aggregating all the named entities
df["Agg"] = df["FAC"] + df["GPE"] + df["ORG"] + df["PERSON"] + df["PRODUCT"] + df["WORK_OF_ART"]

In [12]:
finalDf = jobsDf[["companyName", "jobType", "location", "position", "description"]]
finalDf["Aggregation"] = df["Agg"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  finalDf["Aggregation"] = df["Agg"]


In [13]:
##Extended tag generation
finaltagls = []
for ind, i in enumerate(finalDf["Aggregation"]):
    synonym_ls = []
    i = i.replace(" ", "")
    list_tags = i.split(",")
    for i in list_tags:
        synonym_ls.append(i)
        synonym_ls = synonym_ls + synoget(word_vectors, seed_word=i)
    synowords = ', '.join(synonym_ls)
    finaltagls.append(synowords)
    print(synonym_ls)
    print(" ")

None
None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthPrograms', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Must', 'should', 'cannot', 'would', 'need', 'can', 'will', 'not', 'could', 'be', 'come', 'Metals', 'minerals', 'copper', 'commodities', 'ferrous', 'commodity', 'ores', 'aluminium', 'alloys', 'chemicals', 'zinc', 'Professional', 'amateur', 'football', 'career', 'player', 'club', 'players', 'playing', 'team', 'sports', 'basketball', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Completed', 'completion', 'completing', 'complete', 'first', 'construction', 'commenced', 'finished', 'later', 'project', 'prior', 'Mini

['Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Diploma', 'postgraduate', 'certificate', 'post-graduate', 'diplomas', 'doctorate', 'phd', 'baccalaureate', 'undergraduate', 'doctoral', 'bachelor', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Continuous', 'constant', 'continual', 'periodic', 'static', 'rapid', 'linear', 'continuation', 'normal', 'extensive', 'direct', 'Honours', 'honors', 'honour', 'bestowed', 'knighthood', 'awarded', 'knighted', 'honorary', 'diploma', 'prizes', 'conferred', 'Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'The', 'this', 'part', 'one', 'of', 'same', 'first', 'on', 'its', 'as',

['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthHuman', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Wholesale', 'retail', 'consumer', 'sales', 'prices', 'retailers', 'demand', 'price', 'decline', 'inventories', 'consumption', 'Retail', 'sales', 'retailers', 'consumer', 'stores', 'market', 'retailing', 'wholesale', 'store', 'businesses', 'retailer', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Education', 'educational', 'schools', 'teaching', 'c

['Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Operations', 'operation', 'unit', 'operating', 'personnel', 'military', 'services', 'command', 'operational', 'units', 'force', 'Trainee', 'apprentice', 'full-time', 'part-time', 'janitor', 'technician', 'recruiter', 'trainees', 'paralegal', 'instructor', 'tutor', 'Opportunties', 'opportunites', 'opportunies', 'legals', 'intiatives', 'propects', 'cost-savings', 'character-building', 'powerplays', 'taskings', 'possibilites', 'Engineer', 'engineers', 'technician', 'mechanic', 'engineering', 'architect', 'contractor', 'officer', 'worked', 'master', 'chemist', 'Bsc', 'hons', 'b.sc', 'phd', 'ph.d', 'm.sc', 'bac

['Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Develop', 'developing', 'build', 'create', 'expand', 'improve', 'promote', 'establish', 'enhance', 'developed', 'provide', 'Export', 'import', 'exports', 'imports', 'goods', 'exporting', 'trade', 'output', 'demand', 'products', 'imported', 'Import', 'export', 'imports', 'exports', 'imported', 'tariffs', 'goods', 'importing', 'tariff', 'importation', 'quotas', 'Communication', 'communications', 'systems', 'services', 'technology', 'knowledge', 'information', 'learning', 'interaction', 'navigation', 'connections', 'Internal', 'external', 'ongoing', 'security', 'problems', 'investigations', 'control', 'political', 'investigation', 'current', 'source', 'Cleansing', 'massacres', 'genocide', 'atrocities', 'bloodletting', 'extermination', 'genocidal', 'repression', 'systematic', 'albanians', 'ritual', 'Computer', 'computers', 'software', 'technology', 'pc'

['Passion', 'spirit', 'love', 'enthusiasm', 'devotion', 'fascination', 'pride', 'obsession', 'creativity', 'joy', 'desire', 'Bcom', 'bsba', 'btech', 'bmus', 'b.com', 'b.b.a.', 'ed.d', 'bsee', 'm.b.a', 'j.s.d.', 's.j.d.', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Office', 'offices', 'house', 'department', 'administration', 'post', 'government', 'public', 'official', 'officials', 'headquarters', 'Sciences', 'science', 'institute', 'humanities', 'engineering', 'physics', 'biology', 'chemistry', 'mathematics', 'academy', 'arts', 'Microsoft', 'google', 'netscape', 'ibm', 'intel', 'software', 'yahoo', 'apple', 'aol', 'compaq', 'oracle', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Package', 'packages', 'plan', 'stimulus', 'budget', 'proposal', 'bailout', 'proposals', 'deal', 'cuts', 'plans', 'Competencies', 'competences', 

['Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Building', 'buildings', 'houses', 'built', 'construction', 'tower', 'constructed', 'opened', 'structure', 'build', 'brick', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Relationship', 'relationships', 'relations', 'friendship', 'ties', 'understanding', 'relation', 'role', 'affair', 'partner', 'engagement', 'Information', 'data'

['Mbeki', 'thabo', 'zuma', 'mandela', 'obasanjo', 'anc', 'mugabe', 'olusegun', 'klerk', 'motlanthe', 'chissano', 'GovanInformatics', 'Electrical', 'mechanical', 'electric', 'electricity', 'machinery', 'components', 'engineering', 'wiring', 'plumbing', 'equipment', 'generator', 'Regional', 'central', 'international', 'region', 'national', 'major', 'local', 'economic', 'conference', 'asia', 'western', 'Town', 'village', 'city', 'near', 'towns', 'area', 'nearby', 'northern', 'where', 'southern', 'west', 'Rand', 'sterling', 'dollar', 'franc', 'rupiah', 'francs', 'ringgit', 'dollars', 'rands', 'shilling', 'krone', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Geo', 'rtl', 'ary', 'ptv', 'tvi', 'tsf', 'antena', 'fx', 'nni', 'ntv', 'channel', 'Honours', 'honors', 'honour', 'bestowed', 'knighthood', 'awarded', 'knighted', 'honorary', 'diploma', 'prizes', 'conferred', 'Masters', 'pga', 'tournament', 'tenni

['Zulu', 'xhosa', 'inkatha', 'buthelezi', 'maori', 'ifp', 'kwazulu', 'sinhala', 'zulus', 'māori', 'malay', 'Kwa', 'ume', 'geok', 'turi', 'mulumba', 'sisi', 'kuku', 'kriva', 'fana', 'gok', 'labis', 'Natal', 'kwazulu', 'transvaal', 'gauteng', 'kwazulu-natal', 'durban', 'cape', 'zulu', 'bissau', 'vlore', 'bloemfontein', 'South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaUmgeni', 'Chemistry', 'physics', 'biology', 'biochemistry', 'mathematics', 'sciences', 'science', 'physiology', 'molecular', 'psychology', 'engineering', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Bachelor', 'doctorate', 'degree', 'bachelors', 'graduate', 'ph.d.', 'b.a.', 'undergraduate', 'graduated', 'phd', 'diploma', 'Ec

['Statistics', 'figures', 'report', 'data', 'statistic', 'bureau', 'according', 'estimates', 'statistical', 'survey', 'gdp', 'Data', 'information', 'analysis', 'tracking', 'database', 'system', 'computer', 'statistics', 'systems', 'applications', 'numbers', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Opportunities', 'opportunity', 'possibilities', 'advantages', 'advantage', 'benefit', 'experience', 'innovation', 'resources', 'development', 'encourage', 'Honours', 'honors', 'honour', 'bestowed', 'knighthood', 'awarded', 'knighted', 'honorary', 'diploma', 'prizes', 'conferred', 'Competencies', 'competences', 'competence', 'compete

None
['Audit', 'auditing', 'audits', 'auditors', 'auditor', 'accounting', 'oversight', 'review', 'evaluation', 'appraisal', 'irs', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Critical', 'important', 'crucial', 'significant', 'serious', 'particular', 'key', 'particularly', 'attention', 'vital', 'major', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Intelligence', 'cia', 'information', 'security', 'fbi', 'military', 'secret', 'counterterrorism', 'pentagon', 'defense', 'agents', 'Advisory', 'committee', 'panel', 'commission', 'board', 'review', 'recommendations', 'committees', 'council', 'recommendation', 'expert', 'Internal', 'external', 'ongoing', 'security', 'problems', 'invest

['Diploma', 'postgraduate', 'certificate', 'post-graduate', 'diplomas', 'doctorate', 'phd', 'baccalaureate', 'undergraduate', 'doctoral', 'bachelor', 'Consultant', 'consulting', 'expert', 'consultants', 'adviser', 'director', 'psychologist', 'hired', 'worked', 'researcher', 'entrepreneur', 'Client', 'clients', 'defendant', 'lawyer', 'file', 'customer', 'case', 'microsoft', 'lawyers', 'user', 'phone', 'Exceptional', 'extraordinary', 'outstanding', 'remarkable', 'excellent', 'exemplary', 'tremendous', 'incredible', 'contribution', 'achievement', 'phenomenal', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Ensuring', 'ensure', 'maintaining', 'assure', 'safeguarding', 'ensures', 'improving', 'guaranteeing', 'securing', 'guarantee', 'protecting', 'Service', 'services', 'public', 'network', 'private', 'system', 'business', 'provided', 'available', 'information', 'access', 'Communication', 'communications', 'syste

None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthBrand', 'Structured', 'customized', 'unstructured', 'integrated', 'flexible', 'combining', 'defined', 'structuring', 'hierarchical', 'tailored', 'coherent', 'Office', 'offices', 'house', 'department', 'administration', 'post', 'government', 'public', 'official', 'officials', 'headquarters', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Trainee', 'apprentice', 'full-time', 'part-time', 'janitor', 'technician', 'recruiter', 'trainees', 'paralegal', 'instructor', 'tutor', 'Program', 'programs', 'programme', 'project', 'funding', 'plan', 'system', 'plans', 'education', 'programmes', 'educational', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'E

['Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Applicant', 'applicants', 'eligibility', 'prospective', 'eligible', 'apply', 'criteria', 'respondent', 'employer', 'requirement', 'determines', 'Programmes', 'programme', 'programs', 'program', 'educational', 'projects', 'programming', 'workshops', 'courses', 'curriculum', 'documentaries', 'Support', 'supported', 'backing', 'supporting', 'efforts', 'supports', 'leadership', 'effort', 'for', 'government', 'help', 'Bank', 'banks', 'banking', 'credit', 'investment', 'financial', 'securities', 'lending', 'funds', 'ubs', 'finance', 'African', 'africa', 'africans', 'asian', 'south', 'nations', 'continent', 'country', 'zimbabwe', 'american', 'kenya', 'Standard', 'standards', 'basic', 'system', 'definition', 'example

['Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Logistics', 'logistic', 'operations', 'logistical', 'transport', 'materiel', 'warehousing', 'transportation', 'aviation', 'operational', 'maintenance', 'Mercedes', 'benz', 'bmw', 'porsche', 'lexus', 'suv', 'audi', 'volkswagen', 'car', 'honda', 'renault', 'Studies', 'study', 'research', 'institute', 'studied', 'science', 'researchers', 'studying', 'scientific', 'university', 'literature', 'Compliance', 'comply', 'requirements', 'implementation', 'standards', 'verification', 'supervision', 'inspections', 'obligations', 'complying', 'disarmament', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Chain', 'chains', 'stores', 'store', 'superma

['Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'big', 'business', 'texas', 'industrial', 'advanced', 'technological', 'Graduates', 'undergraduates', 'graduate', 'students', 'enrolled', 'undergraduate', 'colleges', 'enroll', 'graduating', 'teachers', 'professors', 'Computing', 'computer', 'computational', 'software', 'technology', 'networking', 'desktop', 'automation', 'computation', 'applications', 'systems', 'Sandton', 'suntec', 'randburg', 'johannesburg', 'seatac', 'hillbrow', 'bankstown', 'binhai', 'lanseria', 'cubao', 'manukau', '

['StellenboschMust', 'Electrionic', 'Consulting', 'consultant', 'consultancy', 'firm', 'consultants', 'management', 'associates', 'business', 'mckinsey', 'marketing', 'company', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Industry', 'business', 'companies', 'industries', 'sector', 'company', 'manufacturers', 'firms', 'market', 'makers', 'businesses', 'Wits', 'smarts', 'heartstrings', 'perseverance', 'gnaw', 'headingley', 'loughborough', 'mettle', 'tenacity', 'outwit', 'wit', 'Mining', 'coal', 'mines', 'copper', 'industrial', 'ore', 'mine', 'mineral', 'exploration', 'minerals', 'logging', 'Strategy', 'strategies', 'approach', 'policy', 'plan', 'focus', 'focused', 'aim', 'change', 'tactics', 'marketing', 'Mechanical', 'electrical', 'engineering', 'hydraulic

['AspenDiploma', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Capital', 'city', 'central', 'investment', 'government', 'cities', 'outside', 'in', 'bank', 'outskirts', 'northeast', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Health', 'care', 'medical', 'healthcare', 'education', 'welfare', 'environmental', 'nutrition', 'aids', 'prevention', 'hospitals', 'Competencies', 'competences', 'competence', 'competency', 'se

None
None
['Quantitative', 'qualitative', 'empirical', 'analysis', 'methodology', 'analytical', 'methods', 'analyses', 'behavioral', 'descriptive', 'theoretical', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sciences', 'science', 'institute', 'humanities', 'engineering', 'physics', 'biology', 'chemistry', 'mathematics', 'academy', 'arts', 'Governance', 'transparency', 'accountability', 'sustainability', 'management', 'reform', 'framework', 'reforms', 'stability', 'macroeconomic', 'policies', 'Services', 'service', 'transportation', 'communications', 'business', 'access', 'communication', 'provider', 'private', 'providers', 'public', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'c

['Gauteng', 'kwazulu-natal', 'mpumalanga', 'transvaal', 'copperbelt', 'limpopo', 'masvingo', 'manicaland', 'toowoomba', 'natal', 'guangdong', 'JohannesburgTechnology', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sciences', 'science', 'institute', 'humanities', 'engineering', 'physics', 'biology', 'chemistry', 'mathematics', 'academy', 'arts', 'Dimension', 'dimensions', 'element', 'dimensional', 'complexity', 'sphere', 'context', 'spatial', 'aspect', 'definition', 'infinite', 'Services', 'service', 'transportation', 'communications', 'business', 'access', 'communication', 'provider', 'private', 'providers', 'public', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Chain', 'chains', 'stores', 'store', 'supermarket', 'grocery', 'retail', 'retailer', 'restaurants', 'shop', 'supermarkets', 'Eng', 'fra', 'aus',

None
['Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Office', 'offices', 'house', 'department', 'administration', 'post', 'government', 'public', 'official', 'officials', 'headquarters', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Intranet', 'http://web.coxnews.net', 'homepage', 'extranet', 'nrs', 'intranets', 'directory', 'url', 'msnbc.com', 'admin', 'gnn', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Automotive', 'electronics', 'automobile', 'auto', 'aerospace', 'manufacturing', 'industries', 'manufacturer', 'semiconductor', 'motor', 'industrial', 'Aftermarket', 'after-market', 'oem', 'automotive', 'add-on', 'oems', 'avionics', 'endovascular', 'converters', 'hvac', 'upgrades', 'Accounting', 'auditing', 'audit', 'financ

['Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Econometrics', 'climatology', 'criminology', 'econometric', 'geomorphology', 'cybernetics', 'maplewood', 'geology', 'kisco', 'hydrogeology', 'microeconomics', 'Investment', 'investments', 'fund', 'asset', 'financial', 'firms', 'equity', 'funds', 'business', 'portfolio', 'sector', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Stats', 'statistic', 'matchups', 'colts', 'scorers', 'bengals', 'bucs', 'totals', 'chargers', 'statistics', 'broncos', 'Continuous', 'constant', 'con

['Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Electrical', 'mechanical', 'electric', 'electricity', 'machinery', 'components', 'engineering', 'wiring', 'plumbing', 'equipment', 'generator', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'big', 'business', 'texas', 'industrial', 'advanced', 'technological', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Leadership', 'political', 'leaders', 'support', 'party', 'position', 'leader', 'administration', 'responsibility', 'policy', 'coalition', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Management', 'financial', 'business', 'managers', 'investment', 'develo

None
None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthDiploma', 'Chain', 'chains', 'stores', 'store', 'supermarket', 'grocery', 'retail', 'retailer', 'restaurants', 'shop', 'supermarkets', 'Repair', 'repairs', 'maintenance', 'repairing', 'repaired', 'damaged', 'rebuild', 'damage', 'upgrade', 'rebuilding', 'cleaning', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Clinical', 'diagnostic', 'study', 'studies', 'behavioral', 'diagnosis', 'pediatric', 'research', 'medical', 'psychiatric', 'pathology', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Service', 'services', 'public', 'network', 'private', 'system', 'business', 'provided', 'available', 'information', 'access', 'Specialist', 'expert', 'specialists', 'medical', 'consu

None
['Audit', 'auditing', 'audits', 'auditors', 'auditor', 'accounting', 'oversight', 'review', 'evaluation', 'appraisal', 'irs', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Critical', 'important', 'crucial', 'significant', 'serious', 'particular', 'key', 'particularly', 'attention', 'vital', 'major', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Intelligence', 'cia', 'information', 'security', 'fbi', 'military', 'secret', 'counterterrorism', 'pentagon', 'defense', 'agents', 'Advisory', 'committee', 'panel', 'commission', 'board', 'review', 'recommendations', 'committees', 'council', 'recommendation', 'expert', 'Internal', 'external', 'ongoing', 'security', 'problems', 'invest

['Diploma', 'postgraduate', 'certificate', 'post-graduate', 'diplomas', 'doctorate', 'phd', 'baccalaureate', 'undergraduate', 'doctoral', 'bachelor', 'Consultant', 'consulting', 'expert', 'consultants', 'adviser', 'director', 'psychologist', 'hired', 'worked', 'researcher', 'entrepreneur', 'Client', 'clients', 'defendant', 'lawyer', 'file', 'customer', 'case', 'microsoft', 'lawyers', 'user', 'phone', 'Exceptional', 'extraordinary', 'outstanding', 'remarkable', 'excellent', 'exemplary', 'tremendous', 'incredible', 'contribution', 'achievement', 'phenomenal', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Ensuring', 'ensure', 'maintaining', 'assure', 'safeguarding', 'ensures', 'improving', 'guaranteeing', 'securing', 'guarantee', 'protecting', 'Service', 'services', 'public', 'network', 'private', 'system', 'business', 'provided', 'available', 'information', 'access', 'Communication', 'communications', 'syste

None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthBrand', 'Structured', 'customized', 'unstructured', 'integrated', 'flexible', 'combining', 'defined', 'structuring', 'hierarchical', 'tailored', 'coherent', 'Office', 'offices', 'house', 'department', 'administration', 'post', 'government', 'public', 'official', 'officials', 'headquarters', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Trainee', 'apprentice', 'full-time', 'part-time', 'janitor', 'technician', 'recruiter', 'trainees', 'paralegal', 'instructor', 'tutor', 'Program', 'programs', 'programme', 'project', 'funding', 'plan', 'system', 'plans', 'education', 'programmes', 'educational', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'E

['Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Applicant', 'applicants', 'eligibility', 'prospective', 'eligible', 'apply', 'criteria', 'respondent', 'employer', 'requirement', 'determines', 'Programmes', 'programme', 'programs', 'program', 'educational', 'projects', 'programming', 'workshops', 'courses', 'curriculum', 'documentaries', 'Support', 'supported', 'backing', 'supporting', 'efforts', 'supports', 'leadership', 'effort', 'for', 'government', 'help', 'Bank', 'banks', 'banking', 'credit', 'investment', 'financial', 'securities', 'lending', 'funds', 'ubs', 'finance', 'African', 'africa', 'africans', 'asian', 'south', 'nations', 'continent', 'country', 'zimbabwe', 'american', 'kenya', 'Standard', 'standards', 'basic', 'system', 'definition', 'example

['Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Logistics', 'logistic', 'operations', 'logistical', 'transport', 'materiel', 'warehousing', 'transportation', 'aviation', 'operational', 'maintenance', 'Mercedes', 'benz', 'bmw', 'porsche', 'lexus', 'suv', 'audi', 'volkswagen', 'car', 'honda', 'renault', 'Studies', 'study', 'research', 'institute', 'studied', 'science', 'researchers', 'studying', 'scientific', 'university', 'literature', 'Compliance', 'comply', 'requirements', 'implementation', 'standards', 'verification', 'supervision', 'inspections', 'obligations', 'complying', 'disarmament', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Chain', 'chains', 'stores', 'store', 'superma

['Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'big', 'business', 'texas', 'industrial', 'advanced', 'technological', 'Graduates', 'undergraduates', 'graduate', 'students', 'enrolled', 'undergraduate', 'colleges', 'enroll', 'graduating', 'teachers', 'professors', 'Computing', 'computer', 'computational', 'software', 'technology', 'networking', 'desktop', 'automation', 'computation', 'applications', 'systems', 'Sandton', 'suntec', 'randburg', 'johannesburg', 'seatac', 'hillbrow', 'bankstown', 'binhai', 'lanseria', 'cubao', 'manukau', '

['StellenboschMust', 'Electrionic', 'Consulting', 'consultant', 'consultancy', 'firm', 'consultants', 'management', 'associates', 'business', 'mckinsey', 'marketing', 'company', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Industry', 'business', 'companies', 'industries', 'sector', 'company', 'manufacturers', 'firms', 'market', 'makers', 'businesses', 'Wits', 'smarts', 'heartstrings', 'perseverance', 'gnaw', 'headingley', 'loughborough', 'mettle', 'tenacity', 'outwit', 'wit', 'Mining', 'coal', 'mines', 'copper', 'industrial', 'ore', 'mine', 'mineral', 'exploration', 'minerals', 'logging', 'Strategy', 'strategies', 'approach', 'policy', 'plan', 'focus', 'focused', 'aim', 'change', 'tactics', 'marketing', 'Mechanical', 'electrical', 'engineering', 'hydraulic

['AspenDiploma', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Capital', 'city', 'central', 'investment', 'government', 'cities', 'outside', 'in', 'bank', 'outskirts', 'northeast', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Health', 'care', 'medical', 'healthcare', 'education', 'welfare', 'environmental', 'nutrition', 'aids', 'prevention', 'hospitals', 'Competencies', 'competences', 'competence', 'competency', 'se

None
None
['Quantitative', 'qualitative', 'empirical', 'analysis', 'methodology', 'analytical', 'methods', 'analyses', 'behavioral', 'descriptive', 'theoretical', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sciences', 'science', 'institute', 'humanities', 'engineering', 'physics', 'biology', 'chemistry', 'mathematics', 'academy', 'arts', 'Governance', 'transparency', 'accountability', 'sustainability', 'management', 'reform', 'framework', 'reforms', 'stability', 'macroeconomic', 'policies', 'Services', 'service', 'transportation', 'communications', 'business', 'access', 'communication', 'provider', 'private', 'providers', 'public', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'c

['Gauteng', 'kwazulu-natal', 'mpumalanga', 'transvaal', 'copperbelt', 'limpopo', 'masvingo', 'manicaland', 'toowoomba', 'natal', 'guangdong', 'JohannesburgTechnology', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sciences', 'science', 'institute', 'humanities', 'engineering', 'physics', 'biology', 'chemistry', 'mathematics', 'academy', 'arts', 'Dimension', 'dimensions', 'element', 'dimensional', 'complexity', 'sphere', 'context', 'spatial', 'aspect', 'definition', 'infinite', 'Services', 'service', 'transportation', 'communications', 'business', 'access', 'communication', 'provider', 'private', 'providers', 'public', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Chain', 'chains', 'stores', 'store', 'supermarket', 'grocery', 'retail', 'retailer', 'restaurants', 'shop', 'supermarkets', 'Eng', 'fra', 'aus',

 
None
['Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Office', 'offices', 'house', 'department', 'administration', 'post', 'government', 'public', 'official', 'officials', 'headquarters', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Intranet', 'http://web.coxnews.net', 'homepage', 'extranet', 'nrs', 'intranets', 'directory', 'url', 'msnbc.com', 'admin', 'gnn', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Automotive', 'electronics', 'automobile', 'auto', 'aerospace', 'manufacturing', 'industries', 'manufacturer', 'semiconductor', 'motor', 'industrial', 'Aftermarket', 'after-market', 'oem', 'automotive', 'add-on', 'oems', 'avionics', 'endovascular', 'converters', 'hvac', 'upgrades', 'Accounting', 'auditing', 'audit', 'fina

['Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Econometrics', 'climatology', 'criminology', 'econometric', 'geomorphology', 'cybernetics', 'maplewood', 'geology', 'kisco', 'hydrogeology', 'microeconomics', 'Investment', 'investments', 'fund', 'asset', 'financial', 'firms', 'equity', 'funds', 'business', 'portfolio', 'sector', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Stats', 'statistic', 'matchups', 'colts', 'scorers', 'bengals', 'bucs', 'totals', 'chargers', 'statistics', 'broncos', 'Continuous', 'constant', 'con

['Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Electrical', 'mechanical', 'electric', 'electricity', 'machinery', 'components', 'engineering', 'wiring', 'plumbing', 'equipment', 'generator', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'big', 'business', 'texas', 'industrial', 'advanced', 'technological', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Leadership', 'political', 'leaders', 'support', 'party', 'position', 'leader', 'administration', 'responsibility', 'policy', 'coalition', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Management', 'financial', 'business', 'managers', 'investment', 'develo

None
None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthDiploma', 'Chain', 'chains', 'stores', 'store', 'supermarket', 'grocery', 'retail', 'retailer', 'restaurants', 'shop', 'supermarkets', 'Repair', 'repairs', 'maintenance', 'repairing', 'repaired', 'damaged', 'rebuild', 'damage', 'upgrade', 'rebuilding', 'cleaning', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Clinical', 'diagnostic', 'study', 'studies', 'behavioral', 'diagnosis', 'pediatric', 'research', 'medical', 'psychiatric', 'pathology', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Service', 'services', 'public', 'network', 'private', 'system', 'business', 'provided', 'available', 'information', 'access', 'Specialist', 'expert', 'specialists', 'medical', 'consu

None
['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthPrograms', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Must', 'should', 'cannot', 'would', 'need', 'can', 'will', 'not', 'could', 'be', 'come', 'Metals', 'minerals', 'copper', 'commodities', 'ferrous', 'commodity', 'ores', 'aluminium', 'alloys', 'chemicals', 'zinc', 'Professional', 'amateur', 'football', 'career', 'player', 'club', 'players', 'playing', 'team', 'sports', 'basketball', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Completed', 'completion', 'completing', 'complete', 'first', 'construction', 'commenced', 'finished', 'later', 'project', 'prior', 'Mining', 

['Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Diploma', 'postgraduate', 'certificate', 'post-graduate', 'diplomas', 'doctorate', 'phd', 'baccalaureate', 'undergraduate', 'doctoral', 'bachelor', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Science', 'sciences', 'physics', 'institute', 'mathematics', 'studies', 'research', 'biology', 'university', 'psychology', 'economics', 'Continuous', 'constant', 'continual', 'periodic', 'static', 'rapid', 'linear', 'continuation', 'normal', 'extensive', 'direct', 'Honours', 'honors', 'honour', 'bestowed', 'knighthood', 'awarded', 'knighted', 'honorary', 'diploma', 'prizes', 'conferred', 'Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'The', 'this', 'part', 'one', 'of', 'same', 'first', 'on', 'its', 'as',

['Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'SouthHuman', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Wholesale', 'retail', 'consumer', 'sales', 'prices', 'retailers', 'demand', 'price', 'decline', 'inventories', 'consumption', 'Retail', 'sales', 'retailers', 'consumer', 'stores', 'market', 'retailing', 'wholesale', 'store', 'businesses', 'retailer', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Education', 'educational', 'schools', 'teaching', 'c

In [18]:
finalDf["ExtendedTag"]=finaltagls

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  finalDf["ExtendedTag"]=finaltagls


In [21]:
finalDf.head()

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTag
0,Hatch,Graduate programme,Gauteng,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...,"Africa, SouthPrograms, Eng, Must, Metals, Prof...","Africa, african, continent, asia, zimbabwe, so..."
1,Tiger Brands,Graduate programme,Nationwide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...,"Brands, Technology, Business, Food, Tiger, Fut...","Brands, brand, products, cola, cosmetics, spec..."
2,Investec,Graduate programme,Gauteng,Tech Graduate Programme 2023,Investec Sandton is looking for Tech Graduates...,"Business, Systems, Informatics, Tech, Graduate...","Business, industry, companies, businesses, mar..."
3,Nedbank,Graduate programme,Gauteng,Quantitative Analyst Graduate Programme 2023 /...,ResponsibilitiesSeek opportunities to improve ...,"Seek, ResponsibilitiesQuantitative, Applied, N...","Seek, seeking, sought, seeks, allow, agree, ac..."
4,BBD,Graduate programme,Gauteng,Graduate Programme 2023,BBD is an international software firm that sol...,"Informatics, Diploma, Eng, Science, Continuous...","Informatics, biomedical, neuroscience, cyberne..."


In [22]:
#The final has rows of orginal as well as the extended tags for the NER process

In [23]:
finalDf.to_csv("NER.csv")

In [32]:
#tests
nlp = spacy.load('en_core_web_md')
synth = "NER.csv"
extjobs = pd.read_csv(synth, index_col=0)

In [55]:
def get_top_sentence_indexes(prompt, querycols):
    prompt_words = nlp(prompt.lower())
    prompt_words = [token.text for token in prompt_words if not token.is_punct and not token.is_stop]

    all_sentences = []

    for column in querycols:
        for index, sentence in enumerate(extjobs[column]):
            if pd.notna(sentence):  # Check if sentence is not NaN
                tokens = nlp(str(sentence).lower())
                tokens = [token.text for token in tokens if not token.is_punct and not token.is_stop]
                doc = nlp(" ".join(tokens))

                similarities = [doc.similarity(nlp(word.lower())) for word in prompt_words]

                max_similarity = max(similarities)
                all_sentences.append((sentence, max_similarity, index))

    sorted_sentences = sorted(all_sentences, key=lambda x: x[1], reverse=True)
    top_10_indexes = [item[2] for item in sorted_sentences[:10]]

    return top_10_indexes

With extended tag only

In [56]:
%%time
prompt = "chemical"
indexes = get_top_sentence_indexes(prompt, ["ExtendedTag"])
print("Indexes of top 10 sentences:", indexes)

Indexes of top 10 sentences: [7, 76, 145, 192, 53, 122, 27, 96, 165, 39]
CPU times: total: 18 s
Wall time: 18 s


In [57]:
extjobs.iloc[indexes]

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTag
7,Accenture,Graduate programme,Gauteng,Graduate Programme 2023,Accenture offers exceptional development and c...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allan Gray,Internship,Cape Town,Internship Opportunities 2023,Our Retail Operations team assist clients and ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
76,University of Johannesburg (UJ),Internship,Gauteng,YES Internship Programme 2023,The Purpose of the Internship programme is to ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allen and Overy,Vacation work,Gauteng,Winter Vacation Work 2023,Our one-week Summer Vacation Scheme will offer...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
53,Rand Water,Graduate programme,Gauteng,Graduate Programme 2023,Rand Water invites suitably quallified and une...,"Diploma, Chemistry, Analytical, Biology, Appli...","Diploma, postgraduate, certificate, post-gradu..."
53,National Heritage Council (NHC),Internship,Gauteng,HR Internship Programme 2023,The National Heritage Council of South Africa ...,"Diploma, Chemistry, Analytical, Biology, Appli...","Diploma, postgraduate, certificate, post-gradu..."
27,AECI Mining,Graduate programme,Gauteng,Engineering Graduate Programme 2023,Responsibilities:Assist with operational/execu...,"Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
27,Wesgro,Internship,Cape Town,Internship Programme 2023,"Role Purpose: Promote, support and administer ...","Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
96,Department of Agriculture,Internship,Nationwide,Internship Programme 2023,"The Department of Agriculture, Land Reform and...","Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
39,Umgeni Water,Graduate programme,KwaZulu-Natal,Graduate Trainee Programme 2023,Umgeni Water is offering a thirty six (36) mon...,"Zulu, Kwa, Natal, South, AfricaUmgeni, Chemist...","Zulu, xhosa, inkatha, buthelezi, maori, ifp, k..."


In [58]:
%%time
prompt = "I am looking for chemical engineering positions"
indexes = get_top_sentence_indexes(prompt, ["ExtendedTag"])
print("Indexes of top 10 sentences:", indexes)

Indexes of top 10 sentences: [15, 84, 153, 7, 76, 145, 192, 27, 96, 165]
CPU times: total: 22.8 s
Wall time: 22.9 s


In [59]:
extjobs.iloc[indexes]

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTag
15,Sasol,Graduate programme,Mpumalanga,Engineering Graduate Programme 2023,This role is for a position in the graduate de...,"Technology, Eng, Industrial, Operations, Train...","Technology, technologies, computer, tech, soft..."
15,ABSA,Internship,Gauteng,Graduate Internship Programme 2023,Position Title: Risk Management and Dispute Re...,"Technology, Eng, Industrial, Operations, Train...","Technology, technologies, computer, tech, soft..."
84,National Credit Regulator (NCR),Internship,Gauteng,Procurement Internship Programme 2023,Duration: 12 months contract\nStipend: R5391.0...,"Technology, Eng, Industrial, Operations, Train...","Technology, technologies, computer, tech, soft..."
7,Accenture,Graduate programme,Gauteng,Graduate Programme 2023,Accenture offers exceptional development and c...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allan Gray,Internship,Cape Town,Internship Opportunities 2023,Our Retail Operations team assist clients and ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
76,University of Johannesburg (UJ),Internship,Gauteng,YES Internship Programme 2023,The Purpose of the Internship programme is to ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allen and Overy,Vacation work,Gauteng,Winter Vacation Work 2023,Our one-week Summer Vacation Scheme will offer...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
27,AECI Mining,Graduate programme,Gauteng,Engineering Graduate Programme 2023,Responsibilities:Assist with operational/execu...,"Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
27,Wesgro,Internship,Cape Town,Internship Programme 2023,"Role Purpose: Promote, support and administer ...","Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
96,Department of Agriculture,Internship,Nationwide,Internship Programme 2023,"The Department of Agriculture, Land Reform and...","Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."


All columns

In [61]:
%%time
prompt = "chemical"
indexes = get_top_sentence_indexes(prompt, extjobs.columns)
print("Indexes of top 10 sentences:", indexes)

  similarities = [doc.similarity(nlp(word.lower())) for word in prompt_words]


Indexes of top 10 sentences: [7, 76, 145, 192, 53, 122, 7, 76, 145, 192]
CPU times: total: 59.2 s
Wall time: 59.6 s


In [62]:
extjobs.iloc[indexes]

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTag
7,Accenture,Graduate programme,Gauteng,Graduate Programme 2023,Accenture offers exceptional development and c...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allan Gray,Internship,Cape Town,Internship Opportunities 2023,Our Retail Operations team assist clients and ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
76,University of Johannesburg (UJ),Internship,Gauteng,YES Internship Programme 2023,The Purpose of the Internship programme is to ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allen and Overy,Vacation work,Gauteng,Winter Vacation Work 2023,Our one-week Summer Vacation Scheme will offer...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
53,Rand Water,Graduate programme,Gauteng,Graduate Programme 2023,Rand Water invites suitably quallified and une...,"Diploma, Chemistry, Analytical, Biology, Appli...","Diploma, postgraduate, certificate, post-gradu..."
53,National Heritage Council (NHC),Internship,Gauteng,HR Internship Programme 2023,The National Heritage Council of South Africa ...,"Diploma, Chemistry, Analytical, Biology, Appli...","Diploma, postgraduate, certificate, post-gradu..."
7,Accenture,Graduate programme,Gauteng,Graduate Programme 2023,Accenture offers exceptional development and c...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allan Gray,Internship,Cape Town,Internship Opportunities 2023,Our Retail Operations team assist clients and ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
76,University of Johannesburg (UJ),Internship,Gauteng,YES Internship Programme 2023,The Purpose of the Internship programme is to ...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."
7,Allen and Overy,Vacation work,Gauteng,Winter Vacation Work 2023,Our one-week Summer Vacation Scheme will offer...,"StellenboschMust, Electrionic, Consulting, Gra...","StellenboschMust, Electrionic, Consulting, con..."


In [40]:
%%time
prompt = "I am looking for chemical engineering positions"
indexes = get_top_sentence_indexes(prompt, extjobs.columns)
print("Indexes of top 10 sentences:", indexes)

  similarities = [doc.similarity(nlp(word.lower())) for word in prompt_words]


Indexes of top 10 sentences: [15, 84, 153, 7, 76, 145, 192, 15, 84, 153]
CPU times: total: 1min 25s
Wall time: 1min 26s


In [49]:
import spacy
import pandas as pd

nlp = spacy.load('en_core_web_md')

def get_top_sentence_indexes(prompt):
    synth = "NER.csv"

    extjobs = pd.read_csv(synth, index_col=0)

    prompt_words = [token.text.lower() for token in nlp(prompt.lower()) if not token.is_punct and not token.is_stop]
    extjobs_sentences = extjobs['ExtendedTag'].dropna()  # Use only the 'ExtendedTag' column and drop NaN values

    similarities = extjobs_sentences.apply(lambda sentence: nlp(sentence).similarity(nlp(' '.join(prompt_words))))

    sorted_sentences = similarities.sort_values(ascending=False)
    top_10_indexes = sorted_sentences.index[:10].tolist()

    return top_10_indexes


In [50]:
%%time
prompt = "chemical"
indexes = get_top_sentence_indexes(prompt)
print("Indexes of top 10 sentences:", indexes)

Indexes of top 10 sentences: [7, 7, 7, 76, 27, 96, 27, 53, 53, 61]
CPU times: total: 13 s
Wall time: 13 s


In [51]:
%%time
prompt = "I am looking for chemical engineering positions"
indexes = get_top_sentence_indexes(prompt)
print("Indexes of top 10 sentences:", indexes)

Indexes of top 10 sentences: [98, 29, 29, 30, 99, 30, 96, 27, 27, 78]
CPU times: total: 12.3 s
Wall time: 12.3 s


In [52]:
extjobs.iloc[indexes]

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTag
29,Mhlathuze Water,Internship,KwaZulu-Natal,Internship Programme 2023,Mhlathuze Water invites suitably qualified and...,"Technology, Business, Requirements, Building, ...","Technology, technologies, computer, tech, soft..."
29,Eskom,Graduate programme,Gauteng,IT Graduate Trainee Programme 2023,Key ResponsibilitiesLiaise with customers thro...,"Technology, Business, Requirements, Building, ...","Technology, technologies, computer, tech, soft..."
29,Eskom,Graduate programme,Gauteng,IT Graduate Trainee Programme 2023,Key ResponsibilitiesLiaise with customers thro...,"Technology, Business, Requirements, Building, ...","Technology, technologies, computer, tech, soft..."
30,Standard Bank,Graduate programme,Gauteng,CIB Technology Graduate Programme 2023,Our graduate programme blends theoretical lear...,"Informatics, Bank, Applied, Statistics, Techno...","Informatics, biomedical, neuroscience, cyberne..."
30,Investec,Internship,Gauteng,Tech Graduate Internship Programme 2023,Investec Sandton is looking for Tech Graduates...,"Informatics, Bank, Applied, Statistics, Techno...","Informatics, biomedical, neuroscience, cyberne..."
30,Standard Bank,Graduate programme,Gauteng,CIB Technology Graduate Programme 2023,Our graduate programme blends theoretical lear...,"Informatics, Bank, Applied, Statistics, Techno...","Informatics, biomedical, neuroscience, cyberne..."
27,Wesgro,Internship,Cape Town,Internship Programme 2023,"Role Purpose: Promote, support and administer ...","Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
27,AECI Mining,Graduate programme,Gauteng,Engineering Graduate Programme 2023,Responsibilities:Assist with operational/execu...,"Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
27,AECI Mining,Graduate programme,Gauteng,Engineering Graduate Programme 2023,Responsibilities:Assist with operational/execu...,"Mechatronics, Requirements, Bachelor, Health, ...","Mechatronics, bio-technology, bio-medical, bio..."
9,South African National Parks,Internship,Eastern Cape,Finance / Procurement Internship Programme 2023,Applications are hereby invited from graduates...,"Deliver, ResponsibilitiesActuarial, Business, ...","Deliver, delivering, delivered, provide, carry..."
