In [1]:
#Libraries
import numpy as np
import pandas as pd
import spacy

nlp = spacy.load('en_core_web_md')

In [2]:
# Importing data into dataframes
gradProgramsJson = "../Data/Gradeo-data/graduateProgrammes.json"
internshipsJson = "../Data/Gradeo-data/internships.json"
vacworkJson = "../Data/Gradeo-data/vacwork.json"

gradProgramsDf = pd.read_json(gradProgramsJson)
internshipsDf = pd.read_json(internshipsJson)
vacworkDf = pd.read_json(vacworkJson)

In [3]:
# Dropping all the unnesecary columns => companyLogo; applicationURL; deadline
def coldropper(df):
    df.drop(["companyLogo", "applicationURL", "deadline"], axis=1, inplace=True)

In [4]:
coldropper(gradProgramsDf)
coldropper(internshipsDf)
coldropper(vacworkDf)

In [5]:
gradProgramsDf.head(2)

Unnamed: 0,companyName,jobType,location,position,description
0,Hatch,Graduate programme,Johannesburg,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...
1,Tiger Brands,Graduate programme,Nation wide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...


In [6]:
internshipsDf.head(2)

Unnamed: 0,companyName,jobType,location,position,description
0,Gauteng Provincial Government,Internship,Johannesburg,Youth Training / Internship Opportunities 2023,SKills and Training for Solar Technicians 2023...
1,Joburg City Theatres,Internship,Johannesburg,Purchasing / Supply Chain Internship Programme...,Joburg City Theatres is contributing towards s...


In [7]:
vacworkDf.head(2)

Unnamed: 0,companyName,jobType,location,position,description
0,BDO,Vacation work,Port Elizabeth,Readiness Experience Drive (RED) Vacation Work...,Our RED Programme is the ideal vacation work o...
1,Moore,Vacation work,Johannesburg,Winter Vacation Work Experience Programme 2023,The Moore Johannesburg Vacation Program is ope...


In [8]:
gradProgramsDf["description"][0]

'Hatch is currently seeking the top performing and highly motivated\xa0Chemical Engineering\xa0final year students to join us on our 2024 Graduate Selection Journey.\n\xa0\nAs the successful candidate, you will:Be encouraged to be the best version of yourself with many opportunities to pursue work in Mining & Metals, Energy, Digital, or Infrastructure sectors.\nHave the benefit of formal and informal mentorship from some of the leading minds and authorities in the industry.\nHave the opportunity to gain project experience in South Africa and around the world.\nCollaborate with different people across a multitude of cultures around our global organization.\nWork in an exciting environment where you can use your Engineering degree\xa0in your choice of traditional sectors or emerging, innovative, and evolving industries.\nBe part of our local Professional\xa0Development Programs.\nMinimum Requirements:You must be a South African Citizen.\nCompleted or Final year\xa0\xa0BSc or BEng in Chem

In [9]:
locationlist = list(gradProgramsDf["location"].unique()) + list(internshipsDf["location"].unique())+ list(vacworkDf["location"].unique())

In [10]:
locationlist = set(locationlist)

In [11]:
locationlist

{'Addo Elephant National Park',
 'Alberton',
 'Boksburg',
 'Bronkhorstspruit',
 'Cape Town',
 'Centurion',
 'Durban',
 'Gauteng',
 'Germiston',
 'Hotazel',
 'Johannesburg',
 'Kempton Park',
 'Kuruman',
 'KwaZulu-Natal',
 'Midrand',
 'Modderfontein',
 'Mpumalanga',
 'Mulbarton',
 'Nation wide',
 'North West',
 'Polokwane',
 'Port Elizabeth',
 'Pretoria',
 'Richards Bay',
 'Roodepoort',
 'Rustenburg',
 'Sandton',
 'Thohoyandou',
 'Vereeniging',
 'Weltevredenpark'}

In [12]:
location_mapping_dict = {
    'Addo Elephant National Park':'Eastern Cape',
 'Alberton':"Gauteng",
 'Boksburg':"Gauteng",
 'Bronkhorstspruit':"Gauteng",
 'Cape Town':"Cape Town",
 'Centurion':"Gauteng",
 'Durban':"KwaZulu-Natal",
 'Gauteng':"Gauteng",
 'Germiston':"Gauteng",
 'Hotazel':"Northern Cape",
 'Johannesburg':"Gauteng",
 'Kempton Park':"Gauteng",
 'Kuruman':"Northern Cape",
 'KwaZulu-Natal':"KwaZulu-Natal",
 'Midrand':"Gauteng",
 'Modderfontein':"Gauteng",
 'Mpumalanga':"Mpumalanga",
 'Mulbarton':"Gauteng",
 'Nation wide':"Nationwide",
 'North West':"North West",
 'Polokwane':"Limpopo",
 'Port Elizabeth':"Eastern Cape",
 'Pretoria':"Gauteng",
 'Richards Bay':"KwaZulu-Natal",
 'Roodepoort':"Gauteng",
 'Rustenburg':"North West",
 'Sandton':"Gauteng",
 'Thohoyandou':"Limpopo",
 'Vereeniging':"Gauteng",
 'Weltevredenpark':"Gauteng",
}


In [13]:
def location_mapper(dflist, location_mapping_dict):
    for i in dflist:
        i["location"] = i["location"].map(location_mapping_dict)

In [14]:
location_mapper([gradProgramsDf, internshipsDf, vacworkDf], location_mapping_dict)

In [15]:
locationlist_refined = list(gradProgramsDf["location"].unique()) + list(internshipsDf["location"].unique())+ list(vacworkDf["location"].unique())

In [16]:
set(locationlist_refined)

{'Cape Town',
 'Eastern Cape',
 'Gauteng',
 'KwaZulu-Natal',
 'Limpopo',
 'Mpumalanga',
 'Nationwide',
 'North West',
 'Northern Cape'}

In [17]:
#Unified DataFrame
jobsDf = pd.concat([gradProgramsDf, internshipsDf, vacworkDf], axis=0)

In [18]:
jobsDf.shape

(198, 5)

In [19]:
gradProgramsDf["description"][0]

'Hatch is currently seeking the top performing and highly motivated\xa0Chemical Engineering\xa0final year students to join us on our 2024 Graduate Selection Journey.\n\xa0\nAs the successful candidate, you will:Be encouraged to be the best version of yourself with many opportunities to pursue work in Mining & Metals, Energy, Digital, or Infrastructure sectors.\nHave the benefit of formal and informal mentorship from some of the leading minds and authorities in the industry.\nHave the opportunity to gain project experience in South Africa and around the world.\nCollaborate with different people across a multitude of cultures around our global organization.\nWork in an exciting environment where you can use your Engineering degree\xa0in your choice of traditional sectors or emerging, innovative, and evolving industries.\nBe part of our local Professional\xa0Development Programs.\nMinimum Requirements:You must be a South African Citizen.\nCompleted or Final year\xa0\xa0BSc or BEng in Chem

In [20]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [21]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [22]:
doc = nlp(gradProgramsDf["description"][1])

for ent in doc.ents:
    print(ent.text, "|", ent.label_)

The Tiger Brands  | ORG
24-month | DATE
South African | NORP
Graduate Internship Opportunities | ORG
Customer Graduate | PERSON
Food Technology | ORG
Chemical Engineering | ORG
GraduateBSc/BEng Degree ( | ORG
Information Technology | ORG
Information Technology OR Computer Science
Mechanical Engineering | ORG
GraduateBSc/BEng Degree (Mechanical | ORG
Economics | ORG
Strategic Communication | ORG
GraduateBSc/BEng Degree | ORG
Supply Chain | WORK_OF_ART
Supply Chain | WORK_OF_ART
Accounting & | ORG
2 | CARDINAL


In [23]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [24]:
jobsDf

Unnamed: 0,companyName,jobType,location,position,description
0,Hatch,Graduate programme,Gauteng,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...
1,Tiger Brands,Graduate programme,Nationwide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...
2,Investec,Graduate programme,Gauteng,Tech Graduate Programme 2023,Investec Sandton is looking for Tech Graduates...
3,Nedbank,Graduate programme,Gauteng,Quantitative Analyst Graduate Programme 2023 /...,ResponsibilitiesSeek opportunities to improve ...
4,BBD,Graduate programme,Gauteng,Graduate Programme 2023,BBD is an international software firm that sol...
...,...,...,...,...,...
8,Mr Price,Vacation work,KwaZulu-Natal,Winter Vacation Workplace Experience Programme...,Workplace Experience Programme (WEP) is our an...
9,Visa,Vacation work,Gauteng,June Vacation Internship - Data Science (For C...,ResponsibilitiesBe an out-of-the-box thinker w...
10,Citi,Vacation work,Gauteng,EMEA Banking Winter Vacation Internship Progra...,Whether you’ve got prior financial industry ex...
11,Allan Gray,Vacation work,Cape Town,Internship for final year students 2023,Purpose of the role: Our Client Service Centre...


In [25]:
#Using Named Entity recognition to extract features
ner = nlp.pipe_labels['ner']
for i in ner:
    jobsDf[i] = ''

In [26]:
#NER main
for ind,i in enumerate(jobsDf["description"]):
    try:
        doc = nlp(i)
        for ent in doc.ents:
            jobsDf[ent.label_][ind] = jobsDf[ent.label_][ind] + "," + ent.text
    except:
        print("Error")

Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error


In [27]:
jobsDf.tail()

Unnamed: 0,companyName,jobType,location,position,description,CARDINAL,DATE,EVENT,FAC,GPE,...,MONEY,NORP,ORDINAL,ORG,PERCENT,PERSON,PRODUCT,QUANTITY,TIME,WORK_OF_ART
8,Mr Price,Vacation work,KwaZulu-Natal,Winter Vacation Workplace Experience Programme...,Workplace Experience Programme (WEP) is our an...,,",a 12 month",,,",South Africa’s",...,,,,",Wholesale & Retail SETA,the JD Group - one,Ex...",,,,,,
9,Visa,Vacation work,Gauteng,June Vacation Internship - Data Science (For C...,ResponsibilitiesBe an out-of-the-box thinker w...,,,",Recent Graduates / Final year University",,,...,,",South African",,",ResponsibilitiesDeliver,Participation,Commerc...",,,",Maths",,,",Participate,Actuarial Science"
10,Citi,Vacation work,Gauteng,EMEA Banking Winter Vacation Internship Progra...,Whether you’ve got prior financial industry ex...,,,,,",Gqeberha",...,,,,",The Shoprite Group of Companies Supply Chain,...",,,,,,
11,Allan Gray,Vacation work,Cape Town,Internship for final year students 2023,Purpose of the role: Our Client Service Centre...,",12",",1 June 2023",,,",CA(SA",...,,,,",CA,the Theory of Accounting,CTA,PGDA",",60%",,,,,
12,Bank of America,Vacation work,Gauteng,Investment Banking Winter Internship 2023,Our Winter Analyst programme is designed to of...,,,,,,...,,,,",Danone's Operations",,,,,,


In [28]:
import gensim.downloader as api
from gensim.models import KeyedVectors
from typing import Type
# Load pre-trained word-vectors from gensim-data
word_vectors = api.load("glove-wiki-gigaword-100") 


In [29]:
def print_synonyms(word_vectors: Type[KeyedVectors], seed_word: str, n_synonyms):
    result = word_vectors.most_similar(positive=[seed_word])
    print(f"The synonyms for '{seed_word}':")
    for r in result[:n_synonyms]:
        print(r[0], r[1])

In [30]:
print_synonyms(word_vectors, seed_word="consulting", n_synonyms=10)

The synonyms for 'consulting':
consultant 0.7932726144790649
consultancy 0.7872053980827332
firm 0.7739571332931519
consultants 0.7732748985290527
management 0.7026959657669067
associates 0.6969802379608154
business 0.6804054975509644
mckinsey 0.6548133492469788
marketing 0.6362238526344299
company 0.6341273188591003


In [31]:
def calculate_missing_percentage(df):
    total_rows = len(df)
    missing_percentages = (df.isnull().sum() / total_rows) * 100
    return missing_percentages

In [32]:
jobsDf.columns

Index(['companyName', 'jobType', 'location', 'position', 'description',
       'CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC',
       'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT',
       'QUANTITY', 'TIME', 'WORK_OF_ART'],
      dtype='object')

In [33]:
pd.set_option('display.max_columns', None)
jobsDf.head()

Unnamed: 0,companyName,jobType,location,position,description,CARDINAL,DATE,EVENT,FAC,GPE,LANGUAGE,LAW,LOC,MONEY,NORP,ORDINAL,ORG,PERCENT,PERSON,PRODUCT,QUANTITY,TIME,WORK_OF_ART
0,Hatch,Graduate programme,Gauteng,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...,",0",",year,2024,less than 12 months",",Graduate Selection Journey",,",South Africa,South Africa",,,,,",South African",,",Chemical Engineering,Mining & Metals, Energy,...",",70%",,,,,
1,Tiger Brands,Graduate programme,Nationwide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...,",2",",24-month",,,,,,,,",South African",,",The Tiger Brands ,Graduate Internship Opportu...",,",Customer Graduate",,,,",Supply Chain,Supply Chain"
2,Investec,Graduate programme,Gauteng,Tech Graduate Programme 2023,Investec Sandton is looking for Tech Graduates...,",one,one",,,,,,,,,",Investec,South African,Investec",,",the Investec Tech 2024,Grad Programme,the Inv...",,",Required Fields of Study",,,,
3,Nedbank,Graduate programme,Gauteng,Quantitative Analyst Graduate Programme 2023 /...,ResponsibilitiesSeek opportunities to improve ...,,,,,,,,,,,,",Nedbank,Contribute,the Nedbank Quants Trainin...",,",Required Fields,Pure Mathematics",,,,
4,BBD,Graduate programme,Gauteng,Graduate Programme 2023,BBD is an international software firm that sol...,,",the very first week,their first 8 weeks",,,,,,,,",South African,South African",",first",",BBD,BBD,Continuous Learning Programme,The BBD...",,,,,,


In [34]:
jobsDf.drop(["CARDINAL", "DATE", "LANGUAGE", "LAW", "LOC", "MONEY", "NORP", "ORDINAL", "PERCENT", "QUANTITY", "TIME"], axis=1, inplace=True)

In [35]:
jobsDf["WORK_OF_ART"].unique()

array(['', ',Supply Chain,Supply Chain',
       ',Supply Chain\nLogistics\nEconomics\nManagement\nHuman Resources\xa0Trainee Programme,Science Honours,Electrical Engineering\xa0Trainee ProgrammeB.Engineering/ B.Sc.',
       ',Participate,Actuarial Science', ',Quantitative Analysis',
       ',Bsc Environmental Science\nRelevant Degree,Electrical or Electronic',
       ',Requirements',
       ',Process Improvement\nRoot Cause Analysis\nSimulations\nExpense Analysis\nAccounts Receivable\nForex\nNet working Capital Reporting\nRequirements:Grade 12 / Matric\nDegree,Reliable',
       ',Electrical Engineering\nMechanical Engineering\nMechatronics\nApplicant Requirements',
       ',Actuarial Science\nBEng Electrical and Electronic Enineering\nBEng Industrial Engineering\nIndustrial Psychology\n',
       ',Electrical Engineering\nMechanical Engineering: BEng,Civil Engineering\nGeographic Information Systems',
       ',B.Com Honours Degree in either;Logistics\nSupply Chain\xa0\n',
       ',Elect

In [36]:
jobsDf.head()

Unnamed: 0,companyName,jobType,location,position,description,EVENT,FAC,GPE,ORG,PERSON,PRODUCT,WORK_OF_ART
0,Hatch,Graduate programme,Gauteng,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...,",Graduate Selection Journey",,",South Africa,South Africa",",Chemical Engineering,Mining & Metals, Energy,...",,,
1,Tiger Brands,Graduate programme,Nationwide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...,,,,",The Tiger Brands ,Graduate Internship Opportu...",",Customer Graduate",,",Supply Chain,Supply Chain"
2,Investec,Graduate programme,Gauteng,Tech Graduate Programme 2023,Investec Sandton is looking for Tech Graduates...,,,,",the Investec Tech 2024,Grad Programme,the Inv...",",Required Fields of Study",,
3,Nedbank,Graduate programme,Gauteng,Quantitative Analyst Graduate Programme 2023 /...,ResponsibilitiesSeek opportunities to improve ...,,,,",Nedbank,Contribute,the Nedbank Quants Trainin...",",Required Fields,Pure Mathematics",,
4,BBD,Graduate programme,Gauteng,Graduate Programme 2023,BBD is an international software firm that sol...,,,,",BBD,BBD,Continuous Learning Programme,The BBD...",,,


In [37]:
collist = ["FAC", "GPE", "ORG", "PERSON", "PRODUCT", "WORK_OF_ART"]
df = pd.DataFrame( columns=collist)

In [38]:
joblist = []

In [39]:
import re
for i in collist:
    for ind, j in enumerate(jobsDf[i]):
        refined = set(str(j).replace("0", "").replace("...\n","").split(","))
        reflist = list(refined)
        filteredls = [element for element in reflist if len(element) >= 3]
        finalstr = ', '.join(filteredls)
        finalstr = re.sub(r'\s+|[()\xa0&]', ' ', finalstr)
        word_list = re.findall(r'[A-Z][a-z]*', finalstr)
        filteredls = [element for element in word_list if len(element) >= 3]
        filteredls = list(set(filteredls))
        finalstr = ', '.join(filteredls)
        joblist.append(finalstr)
    df[i] = joblist
    joblist = []

In [40]:
df["Agg"] = df["FAC"] + df["GPE"] + df["ORG"] + df["PERSON"] + df["PRODUCT"] + df["WORK_OF_ART"]

In [41]:
df["Agg"][1]

'The, Opportunities, Tiger, Degree, Technology, Communication, Information, Graduate, Computer, Brands, Food, Chemical, Mechanical, Strategic, Economics, Internship, Eng, Science, Engineering, AccountingGraduate, CustomerSupply, Chain'

In [42]:
df["Agg"]

0      South, AfricaChemical, Institution, Digital, E...
1      The, Opportunities, Tiger, Degree, Technology,...
2      Commerce, Informatics, Investec, Grad, Busines...
3      Improve, Nedbank, Financial, Quantitative, Con...
4      The, Continuous, Informatics, Grad, Programme,...
                             ...                        
193    South, AfricaWholesale, Institution, Group, In...
194    Commerce, Industrial, Responsibilities, Busine...
195    GqeberhaInternational, Skills, Durban, Group, ...
196                                   Theory, Accounting
197                                   Danone, Operations
Name: Agg, Length: 198, dtype: object

In [43]:
extjobs = jobsDf[["companyName", "jobType", "location", "position", "description"]]

In [44]:
extjobs["Aggregation"] = df["Agg"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extjobs["Aggregation"] = df["Agg"]


In [45]:
extjobs

Unnamed: 0,companyName,jobType,location,position,description,Aggregation
0,Hatch,Graduate programme,Gauteng,Chemical Engineering Graduate Programme 2023,Hatch is currently seeking the top performing ...,"South, AfricaChemical, Institution, Digital, E..."
1,Tiger Brands,Graduate programme,Nationwide,Future Leaders Graduate Development Programme ...,The Tiger Brands Future Leaders Development Pr...,"The, Opportunities, Tiger, Degree, Technology,..."
2,Investec,Graduate programme,Gauteng,Tech Graduate Programme 2023,Investec Sandton is looking for Tech Graduates...,"Commerce, Informatics, Investec, Grad, Busines..."
3,Nedbank,Graduate programme,Gauteng,Quantitative Analyst Graduate Programme 2023 /...,ResponsibilitiesSeek opportunities to improve ...,"Improve, Nedbank, Financial, Quantitative, Con..."
4,BBD,Graduate programme,Gauteng,Graduate Programme 2023,BBD is an international software firm that sol...,"The, Continuous, Informatics, Grad, Programme,..."
...,...,...,...,...,...,...
8,Mr Price,Vacation work,KwaZulu-Natal,Winter Vacation Workplace Experience Programme...,Workplace Experience Programme (WEP) is our an...,"South, AfricaWholesale, Institution, Group, In..."
9,Visa,Vacation work,Gauteng,June Vacation Internship - Data Science (For C...,ResponsibilitiesBe an out-of-the-box thinker w...,"Commerce, Industrial, Responsibilities, Busine..."
10,Citi,Vacation work,Gauteng,EMEA Banking Winter Vacation Internship Progra...,Whether you’ve got prior financial industry ex...,"GqeberhaInternational, Skills, Durban, Group, ..."
11,Allan Gray,Vacation work,Cape Town,Internship for final year students 2023,Purpose of the role: Our Client Service Centre...,"Theory, Accounting"


In [46]:
import gensim.downloader as api
from gensim.models import KeyedVectors
from typing import Type
# Load pre-trained word-vectors from gensim-data
word_vectors = api.load("glove-wiki-gigaword-100") 

def print_synonyms(word_vectors: Type[KeyedVectors], seed_word: str, n_synonyms):
    result = word_vectors.most_similar(positive=[seed_word])
    print(f"The synonyms for '{seed_word}':")
    for r in result[:n_synonyms]:
        print(r[0], r[1])


The synonyms for 'consulting':
consultant 0.7932726144790649
consultancy 0.7872053980827332
firm 0.7739571332931519
consultants 0.7732748985290527
management 0.7026959657669067
associates 0.6969802379608154
business 0.6804054975509644
mckinsey 0.6548133492469788
marketing 0.6362238526344299
company 0.6341273188591003


In [73]:
def synoget(word_vectors: Type[KeyedVectors], seed_word: str):
    synlis = []
    try:
        result = word_vectors.most_similar(positive=[seed_word.lower()])
        for r in result:
            synlis.append(r[0])
    except:
        print("None")
    return synlis

In [79]:
synoget(word_vectors, seed_word="Institution")

['institutions',
 'society',
 'institute',
 'established',
 'private',
 'university',
 'foundation',
 'academic',
 'education',
 'educational']

In [99]:
##Extended tag generation
finaltagls = []
for ind, i in enumerate(extjobs["Aggregation"]):
    synonym_ls = []
    i = i.replace(" ", "")
    list_tags = i.split(",")
    for i in list_tags:
        synonym_ls.append(i)
        synonym_ls = synonym_ls + synoget(word_vectors, seed_word=i)
    synowords = ', '.join(synonym_ls)
    finaltagls.append(synowords)
    print(synonym_ls)
    print(" ")

None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaChemical', 'Institution', 'institutions', 'society', 'institute', 'established', 'private', 'university', 'foundation', 'academic', 'education', 'educational', 'Digital', 'electronic', 'analog', 'video', 'audio', 'multimedia', 'computer', 'wireless', 'interactive', 'internet', 'computers', 'Energy', 'resources', 'gas', 'renewable', 'natural', 'petroleum', 'electricity', 'oil', 'power', 'development', 'fuel', 'Metals', 'minerals', 'copper', 'commodities', 'ferrous', 'commodity', 'ores', 'aluminium', 'alloys', 'chemicals', 'zinc', 'Mining', 'coal', 'mines', 'copper', 'industrial', 'ore', 'mine', 'mineral', 'exploration', 'minerals', 'logging', 'Eng', 'fra', 'aus', 'capt', 'saracens', 'leicester', 'rsa', 'esp', 'harlequins', '/', 'gareth', 'Engineering', 'sciences', 'technology', 'technical', 'science', 'mechanical', 'engineers', 'institute', 'mathematics', 'biome

None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaInstrumentation', 'Telecommunication', 'telecommunications', 'telecoms', 'telecom', 'communications', 'wireless', 'communication', 'telephony', 'infrastructure', 'services', 'provider', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'Opportunities', 'opportunity', 'possibilities', 'advantages', 'advantage', 'benefit', 'experience', 'innovation', 'resources', 'development', 'encourage', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'big', 'business', 'texas', 'industrial', 'adva

None
['Commerce', 'trade', 'department', 'transportation', 'tourism', 'finance', 'industry', 'affairs', 'agriculture', 'business', 'bureau', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Responsibilities', 'duties', 'responsibility', 'obligations', 'tasks', 'matters', 'fulfill', 'priorities', 'delegated', 'objectives', 'entrusted', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Systems', 'system', 'technologies', 'equipment', 'technology', 'software', 'computer', 'devices', 'components', 'communications', 'communication', 'Deliver', '

['AccountingOpportunities', 'Post', 'office', 'times', 'news', 'end', 'following', 'new', 'time', 'first', 'newspaper', 'recently', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Mathematics', 'physics', 'mathematical', 'science', 'biology', 'chemistry', 'sociology', 'philosophy', 'humanities', 'linguistics', 'sciences', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Investment', 'investments', 'fund', 'asset', 'financial', 'firms', 'equity', 'funds', 'business', 'portfolio', 'sector', 'Statistics', 'figures', 'report', 'data', 'statistic', 'bureau', 'acc

['Import', 'export', 'imports', 'exports', 'imported', 'tariffs', 'goods', 'importing', 'tariff', 'importation', 'quotas', 'Internal', 'external', 'ongoing', 'security', 'problems', 'investigations', 'control', 'political', 'investigation', 'current', 'source', 'Research', 'studies', 'study', 'scientific', 'institute', 'science', 'researchers', 'analysis', 'development', 'laboratory', 'researcher', 'Export', 'import', 'exports', 'imports', 'goods', 'exporting', 'trade', 'output', 'demand', 'products', 'imported', 'Cleansing', 'massacres', 'genocide', 'atrocities', 'bloodletting', 'extermination', 'genocidal', 'repression', 'systematic', 'albanians', 'ritual', 'Com', 'dot', 'harrynytimes', 'e', 'telegram', 'pager', 'nn', 'coms', 'web', 'berrisnytimes', 'internet', 'External', 'internal', 'structural', 'foreign', 'direct', 'and/or', 'current', 'system', 'affairs', 'budgetary', 'input', 'Market', 'markets', 'stock', 'prices', 'stocks', 'investors', 'retail', 'business', 'consumer', 'tradi

None
None
None
['Power', 'control', 'electricity', 'powers', 'energy', 'over', 'pressure', 'system', 'turn', 'government', 'bring', 'Detective', 'detectives', 'cop', 'investigator', 'lapd', 'nypd', 'homicide', 'inspector', 'undercover', 'sheriff', 'fbi', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Overview', 'chronology', 'in-depth', 'timeline', 'detailed', 'illustration', 'description', 'graphic', 'biographical', 'synopsis', 'outline', 'Planning', 'plans', 'planned', 'preparing', 'responsible', 'plan', 'development', 'activities', 'involved', 'continue', 'begin', 'Controls', 'control', 'controlled', 'controlling', 'power', 'restrictions', 'system', 'allows', 'grip', 'limits', 'regulation', 'Training', 'preparation', 'skills', 'teaching', 'combat', 'program', 'trained', 'instructors', 'personnel', 'operations', 'courses', 'Computer', 'computers', 'software', 'technology', 'pc', 'hardware',

['Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Statistics', 'figures', 'report', 'data', 'statistic', 'bureau', 'according', 'estimates', 'statistical', 'survey', 'gdp', 'Bank', 'banks', 'banking', 'credit', 'investment', 'financial', 'securities', 'lending', 'fund

None
['Academy', 'arts', 'school', 'institute', 'college', 'sciences', 'university', 'graduate', 'science', 'graduating', 'graduated', 'The', 'this', 'part', 'one', 'of', 'same', 'first', 'on', 'its', 'as', 'that', 'Tools', 'tool', 'techniques', 'methods', 'equipment', 'software', 'hardware', 'useful', 'computers', 'materials', 'machines', 'Insurance', 'insurers', 'pension', 'premiums', 'care', 'insurer', 'savings', 'credit', 'health', 'payments', 'pay', 'Term', 'terms', 'current', 'means', 'given', 'likely', 'change', 'as', 'future', 'considered', 'interest', 'Short', 'long', 'shorter', 'longer', 'well', 'though', 'time', 'making', 'instead', 'rather', 'few', 'Brokerages', 'brokers', 'brokerage', 'lenders', 'insurers', 'banks', 'issuers', 'firms', 'securities', 'bankers', 'steelmakers', 'Our', 'their', 'my', 'your', 'own', 'we', 'way', 'what', 'good', 'whatever', 'need', 'Laptop', 'laptops', 'computers', 'phones', 'portable', 'desktop', 'ipod', 'computer', 'handheld', 'pc', 'cellphone

None
None
['Worcester', 'norwich', 'exeter', 'durham', 'northampton', 'peterborough', 'birmingham', 'chelmsford', 'essex', 'hereford', 'stamford', 'JohannesburgAccountants', 'African', 'africa', 'africans', 'asian', 'south', 'nations', 'continent', 'country', 'zimbabwe', 'american', 'kenya', 'Professional', 'amateur', 'football', 'career', 'player', 'club', 'players', 'playing', 'team', 'sports', 'basketball', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Institute', 'research', 'sciences', 'studies', 'science', 'university', 'study', 'academy', 'foundation', 'society', 'professor', 'Pastel', 'pastels', 'shades', 'monochromatic', 'watercolor', 'multicolored', 'hues', 'palette', 'garish', 'canvases', 'shimmering', 'South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'Stellenbosch', 'paarl', 'potchefstroom', 'witwatersrand', 'kwazulu-natal', 'khaye

None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaThe', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Fraud', 'bribery', 'corruption', 'embezzlement', 'charges', 'evasion', 'irregularities', 'alleging', 'theft', 'laundering', 'allegations', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Compliance', 'comply', 'requirements', 'implementation', 'standards', 'verification', 'supervision', 'inspections', 'obligations', 'complying', 'disarmament', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'RiskRequired', 'Fields', 'field', 'areas', 'natural', 'agricultural', 'well', 'projects', 'surrounding', 'farming', 'land', 'development']
 
None
None
Non

None
None
['Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Team', 'teams', 'squad', 'football', 'players', 'coach', 'player', 'league', 'season', 'soccer', 'championship', 'Visualisation', 'visualization', 'visualizations', 'workflow', 'texturing', 'deduplication', 'curation', 'transcoding', 'microfluidics', 'wood-carving', 'collocation', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Ops', 'paratroop', 'sof', 'afterschool', 'slugging', 'intranets', 'nightwatch', 'obp', 'mnf', 'cubicle', 'condominiums', 'Market', 'markets', 'stock', 'prices', 'stocks', 'investors', 'retail', 'business', 'consumer', 'trading', 'price', 'Project', 'projects', 'development', 'program', 'plan', 'const

None
None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaGroup', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Mechatronics', 'bio-technology', 'bio-medical', 'bioprocess', 'geomatics', 'mechatronic', 'electronical', 'hydraulics', 'bioengineering', 'component-based', 'psychophysics', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'C

None
['Point', 'points', 'one', 'time', 'just', 'close', 'this', 'only', 'same', 'difference', 'end', 'Skills', 'skill', 'abilities', 'talents', 'expertise', 'learning', 'ability', 'knowledge', 'experience', 'talent', 'techniques', 'Team', 'teams', 'squad', 'football', 'players', 'coach', 'player', 'league', 'season', 'soccer', 'championship', 'Weekly', 'daily', 'monthly', 'newspaper', 'magazine', 'publication', 'news', 'publishes', 'published', 'newspapers', 'quarterly', 'Power', 'control', 'electricity', 'powers', 'energy', 'over', 'pressure', 'system', 'turn', 'government', 'bring', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'Register', 'registered', 'registry', 'registration', 'listed', 'listing', 'check', 'lists', 'voter', 'required',

['Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Programmes', 'programme', 'programs', 'program', 'educational', 'projects', 'programming', 'workshops', 'courses', 'curriculum', 'documentaries', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Applicant', 'applicants', 'eligibility', 'prospective', 'eligible', 'apply', 'criteria', 'respondent', 'employer', 'requirement', 'determines', 'Bank', 'banks', 'banking', 'credit', 'investment', 'f

None
['Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Legal', 'law', 'constitutional', 'rights', 'judicial', 'case', 'political', 'laws', 'issue', 'court', 'civil', 'Computer', 'computers', 'software', 'technology', 'pc', 'hardware', 'internet', 'desktop', 'electro

None
['Improve', 'improving', 'enhance', 'improved', 'strengthen', 'maintain', 'develop', 'boost', 'expand', 'ensure', 'better', 'Nedbank', 'cosafa', 'twenty-20', 'vodacom', 'dunhill', 'absa', 'invitational', 'sundarbans', 'canada-russia', 'jeonju', 'everland', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Quantitative', 'qualitative', 'empirical', 'analysis', 'methodology', 'analytical', 'methods', 'analyses', 'behavioral', 'descriptive', 'theoretical', 'Contribute', 'contributes', 'encourage', 'reduce', 'benefit', 'promote', 'enhance', 'improve', 'achieve', 'expand', 'accelerate', 'Training', 'preparation', 'skills', 'teaching', 'combat', 'program', 'trained', 'instructors', 'personnel', 'operations', 'courses', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Quants', 'b-boys', 'paleoconservatives', 'moscovites'

['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaWholesale', 'Institution', 'institutions', 'society', 'institute', 'established', 'private', 'university', 'foundation', 'academic', 'education', 'educational', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Digital', 'electronic', 'analog', 'video', 'audio', 'multimedia', 'computer', 'wireless', 'interactive', 'internet', 'computers', 'Education', 'educational', 'schools', 'teaching', 'curriculum', 'programs', 'social', 'health', 'school', 'academic', 'welfare', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Marketing', 'advertising', 'business', 'merchandising', 'product', 'management',

['Geology', 'mineralogy', 'zoology', 'geography', 'archaeology', 'paleontology', 'botany', 'geomorphology', 'archeology', 'anthropology', 'biology', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Trainee', 'apprentice', 'full-time', 'part-time', 'janitor', 'technician', 'recruiter', 'trainees', 'paralegal', 'instructor', 'tutor', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Operations', 'operation', 'unit', 'operating', 'personnel', 'military', 'services', 'command', 'operational', 'units', 'force', 'Bsc', 'hons', 'b.sc', 'phd', 'ph.d', 'm.sc', 'bachelor', 'mphil', 'doctorate', 'b.a.', 'bachelors', 'Engineer', 'engineers', 'technician', 'mechanic', 'engineerin

None
None
['Geology', 'mineralogy', 'zoology', 'geography', 'archaeology', 'paleontology', 'botany', 'geomorphology', 'archeology', 'anthropology', 'biology', 'Forensic', 'forensics', 'pathologist', 'investigator', 'examination', 'pathologists', 'investigators', 'examined', 'ballistics', 'detectives', 'expert', 'Opportunities', 'opportunity', 'possibilities', 'advantages', 'advantage', 'benefit', 'experience', 'innovation', 'resources', 'development', 'encourage', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sustainability', 'sustainable', 'governance', 'innovation', 'environmental', 'ecological', 'entrepreneurship', 'biodiversity', 'competitiveness', 'conservation', 'excellence', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Crime', 'criminal', 'crimes', 'murder', 'trafficking', 'terrorism', 'homicide', 'corru

None
['Johannesburg', 'pretoria', 'harare', 'durban', 'nairobi', 'soweto', 'sydney', 'perth', 'bangkok', 'sandton', 'bulawayo', 'GautengInformatics', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Risk', 'risks', 'danger', 'exposure', 'potential', 'cause', 'avoid', 'consequences', 'likelihood', 'serious', 'potentially', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Audit', 'auditing', 'audits', 'auditors',

None
None
None
['Power', 'control', 'electricity', 'powers', 'energy', 'over', 'pressure', 'system', 'turn', 'government', 'bring', 'Detective', 'detectives', 'cop', 'investigator', 'lapd', 'nypd', 'homicide', 'inspector', 'undercover', 'sheriff', 'fbi', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Overview', 'chronology', 'in-depth', 'timeline', 'detailed', 'illustration', 'description', 'graphic', 'biographical', 'synopsis', 'outline', 'Planning', 'plans', 'planned', 'preparing', 'responsible', 'plan', 'development', 'activities', 'involved', 'continue', 'begin', 'Controls', 'control', 'controlled', 'controlling', 'power', 'restrictions', 'system', 'allows', 'grip', 'limits', 'regulation', 'Training', 'preparation', 'skills', 'teaching', 'combat', 'program', 'trained', 'instructors', 'personnel', 'operations', 'courses', 'Computer', 'computers', 'software', 'technology', 'pc', 'hardware',

['Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Statistics', 'figures', 'report', 'data', 'statistic', 'bureau', 'according', 'estimates', 'statistical', 'survey', 'gdp', 'Bank', 'banks', 'banking', 'credit', 'investment', 'financial', 'securities', 'lending', 'fund

None
['Verbal', 'insults', 'implicit', 'responses', 'nonverbal', 'rhetorical', 'taunts', 'slurs', 'non-verbal', 'explicit', 'outbursts', 'Skills', 'skill', 'abilities', 'talents', 'expertise', 'learning', 'ability', 'knowledge', 'experience', 'talent', 'techniques', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Computer', 'compute

None
None
['Worcester', 'norwich', 'exeter', 'durham', 'northampton', 'peterborough', 'birmingham', 'chelmsford', 'essex', 'hereford', 'stamford', 'JohannesburgAccountants', 'African', 'africa', 'africans', 'asian', 'south', 'nations', 'continent', 'country', 'zimbabwe', 'american', 'kenya', 'Professional', 'amateur', 'football', 'career', 'player', 'club', 'players', 'playing', 'team', 'sports', 'basketball', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Institute', 'research', 'sciences', 'studies', 'science', 'university', 'study', 'academy', 'foundation', 'society', 'professor', 'Pastel', 'pastels', 'shades', 'monochromatic', 'watercolor', 'multicolored', 'hues', 'palette', 'garish', 'canvases', 'shimmering', 'South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'Stellenbosch', 'paarl', 'potchefstroom', 'witwatersrand', 'kwazulu-natal', 'khaye

None
None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaThe', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Fraud', 'bribery', 'corruption', 'embezzlement', 'charges', 'evasion', 'irregularities', 'alleging', 'theft', 'laundering', 'allegations', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Compliance', 'comply', 'requirements', 'implementation', 'standards', 'verification', 'supervision', 'inspections', 'obligations', 'complying', 'disarmament', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'RiskRequired', 'Fields', 'field', 'areas', 'natural', 'agricultural', 'well', 'projects', 'surrounding', 'farming', 'land', 'development']
 
None
Non

None
None
['Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Team', 'teams', 'squad', 'football', 'players', 'coach', 'player', 'league', 'season', 'soccer', 'championship', 'Visualisation', 'visualization', 'visualizations', 'workflow', 'texturing', 'deduplication', 'curation', 'transcoding', 'microfluidics', 'wood-carving', 'collocation', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Ops', 'paratroop', 'sof', 'afterschool', 'slugging', 'intranets', 'nightwatch', 'obp', 'mnf', 'cubicle', 'condominiums', 'Market', 'markets', 'stock', 'prices', 'stocks', 'investors', 'retail', 'business', 'consumer', 'trading', 'price', 'Project', 'projects', 'development', 'program', 'plan', 'const

None
None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaGroup', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Mechatronics', 'bio-technology', 'bio-medical', 'bioprocess', 'geomatics', 'mechatronic', 'electronical', 'hydraulics', 'bioengineering', 'component-based', 'psychophysics', 'Administration', 'government', 'policy', 'bush', 'congress', 'officials', 'clinton', 'office', 'department', 'federal', 'washington', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'C

None
['Point', 'points', 'one', 'time', 'just', 'close', 'this', 'only', 'same', 'difference', 'end', 'Skills', 'skill', 'abilities', 'talents', 'expertise', 'learning', 'ability', 'knowledge', 'experience', 'talent', 'techniques', 'Team', 'teams', 'squad', 'football', 'players', 'coach', 'player', 'league', 'season', 'soccer', 'championship', 'Weekly', 'daily', 'monthly', 'newspaper', 'magazine', 'publication', 'news', 'publishes', 'published', 'newspapers', 'quarterly', 'Power', 'control', 'electricity', 'powers', 'energy', 'over', 'pressure', 'system', 'turn', 'government', 'bring', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Competencies', 'competences', 'competence', 'competency', 'self-discipline', 'strengths', 'methodologies', 'qualifications', 'teamwork', 'proficiencies', 'abilities', 'Register', 'registered', 'registry', 'registration', 'listed', 'listing', 'check', 'lists', 'voter', 'required',

['Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Requirements', 'requirement', 'standards', 'regulations', 'required', 'rules', 'criteria', 'limits', 'guidelines', 'compliance', 'require', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Programmes', 'programme', 'programs', 'program', 'educational', 'projects', 'programming', 'workshops', 'courses', 'curriculum', 'documentaries', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Finance', 'banking', 'foreign', 'minister', 'financial', 'commerce', 'investment', 'reform', 'government', 'fund', 'affairs', 'Applicant', 'applicants', 'eligibility', 'prospective', 'eligible', 'apply', 'criteria', 'respondent', 'employer', 'requirement', 'determines', 'Bank', 'banks', 'banking', 'credit', 'investment', 'f

None
['Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Africa', 'african', 'continent', 'asia', 'zimbabwe', 'south', 'australia', 'kenya', 'nations', 'africans', 'countries', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Legal', 'law', 'constitutional', 'rights', 'judicial', 'case', 'political', 'laws', 'issue', 'court', 'civil', 'Computer', 'computers', 'software', 'technology', 'pc', 'hardware', 'internet', 'desktop', 'electro

None
['Improve', 'improving', 'enhance', 'improved', 'strengthen', 'maintain', 'develop', 'boost', 'expand', 'ensure', 'better', 'Nedbank', 'cosafa', 'twenty-20', 'vodacom', 'dunhill', 'absa', 'invitational', 'sundarbans', 'canada-russia', 'jeonju', 'everland', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Quantitative', 'qualitative', 'empirical', 'analysis', 'methodology', 'analytical', 'methods', 'analyses', 'behavioral', 'descriptive', 'theoretical', 'Contribute', 'contributes', 'encourage', 'reduce', 'benefit', 'promote', 'enhance', 'improve', 'achieve', 'expand', 'accelerate', 'Training', 'preparation', 'skills', 'teaching', 'combat', 'program', 'trained', 'instructors', 'personnel', 'operations', 'courses', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Quants', 'b-boys', 'paleoconservatives', 'moscovites'

['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaWholesale', 'Institution', 'institutions', 'society', 'institute', 'established', 'private', 'university', 'foundation', 'academic', 'education', 'educational', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Digital', 'electronic', 'analog', 'video', 'audio', 'multimedia', 'computer', 'wireless', 'interactive', 'internet', 'computers', 'Education', 'educational', 'schools', 'teaching', 'curriculum', 'programs', 'social', 'health', 'school', 'academic', 'welfare', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Marketing', 'advertising', 'business', 'merchandising', 'product', 'management',

['Geology', 'mineralogy', 'zoology', 'geography', 'archaeology', 'paleontology', 'botany', 'geomorphology', 'archeology', 'anthropology', 'biology', 'Chemical', 'chemicals', 'biological', 'toxic', 'materials', 'biochemical', 'compounds', 'industrial', 'components', 'nuclear', 'pesticides', 'Trainee', 'apprentice', 'full-time', 'part-time', 'janitor', 'technician', 'recruiter', 'trainees', 'paralegal', 'instructor', 'tutor', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Operations', 'operation', 'unit', 'operating', 'personnel', 'military', 'services', 'command', 'operational', 'units', 'force', 'Bsc', 'hons', 'b.sc', 'phd', 'ph.d', 'm.sc', 'bachelor', 'mphil', 'doctorate', 'b.a.', 'bachelors', 'Engineer', 'engineers', 'technician', 'mechanic', 'engineerin

None
None
['Geology', 'mineralogy', 'zoology', 'geography', 'archaeology', 'paleontology', 'botany', 'geomorphology', 'archeology', 'anthropology', 'biology', 'Forensic', 'forensics', 'pathologist', 'investigator', 'examination', 'pathologists', 'investigators', 'examined', 'ballistics', 'detectives', 'expert', 'Opportunities', 'opportunity', 'possibilities', 'advantages', 'advantage', 'benefit', 'experience', 'innovation', 'resources', 'development', 'encourage', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Sustainability', 'sustainable', 'governance', 'innovation', 'environmental', 'ecological', 'entrepreneurship', 'biodiversity', 'competitiveness', 'conservation', 'excellence', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Crime', 'criminal', 'crimes', 'murder', 'trafficking', 'terrorism', 'homicide', 'corru

None
['Johannesburg', 'pretoria', 'harare', 'durban', 'nairobi', 'soweto', 'sydney', 'perth', 'bangkok', 'sandton', 'bulawayo', 'GautengInformatics', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Risk', 'risks', 'danger', 'exposure', 'potential', 'cause', 'avoid', 'consequences', 'likelihood', 'serious', 'potentially', 'Technology', 'technologies', 'computer', 'tech', 'software', 'systems', 'innovation', 'technological', 'engineering', 'research', 'computing', 'Information', 'data', 'knowledge', 'access', 'provided', 'intelligence', 'source', 'documents', 'web', 'internet', 'provide', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Audit', 'auditing', 'audits', 'auditors',

None
None
None
['Power', 'control', 'electricity', 'powers', 'energy', 'over', 'pressure', 'system', 'turn', 'government', 'bring', 'Detective', 'detectives', 'cop', 'investigator', 'lapd', 'nypd', 'homicide', 'inspector', 'undercover', 'sheriff', 'fbi', 'Management', 'financial', 'business', 'managers', 'investment', 'development', 'consulting', 'managing', 'marketing', 'corporate', 'asset', 'Overview', 'chronology', 'in-depth', 'timeline', 'detailed', 'illustration', 'description', 'graphic', 'biographical', 'synopsis', 'outline', 'Planning', 'plans', 'planned', 'preparing', 'responsible', 'plan', 'development', 'activities', 'involved', 'continue', 'begin', 'Controls', 'control', 'controlled', 'controlling', 'power', 'restrictions', 'system', 'allows', 'grip', 'limits', 'regulation', 'Training', 'preparation', 'skills', 'teaching', 'combat', 'program', 'trained', 'instructors', 'personnel', 'operations', 'courses', 'Computer', 'computers', 'software', 'technology', 'pc', 'hardware',

['Standard', 'standards', 'basic', 'system', 'definition', 'example', 'same', 'code', 'type', 'use', 'common', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Software', 'computer', 'hardware', 'microsoft', 'applications', 'technology', 'server', 'user', 'computers', 'desktop', 'web', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Statistics', 'figures', 'report', 'data', 'statistic', 'bureau', 'according', 'estimates', 'statistical', 'survey', 'gdp', 'Bank', 'banks', 'banking', 'credit', 'investment', 'financial', 'securities', 'lending', 'fund

None
['Academy', 'arts', 'school', 'institute', 'college', 'sciences', 'university', 'graduate', 'science', 'graduating', 'graduated', 'The', 'this', 'part', 'one', 'of', 'same', 'first', 'on', 'its', 'as', 'that', 'Tools', 'tool', 'techniques', 'methods', 'equipment', 'software', 'hardware', 'useful', 'computers', 'materials', 'machines', 'Insurance', 'insurers', 'pension', 'premiums', 'care', 'insurer', 'savings', 'credit', 'health', 'payments', 'pay', 'Term', 'terms', 'current', 'means', 'given', 'likely', 'change', 'as', 'future', 'considered', 'interest', 'Short', 'long', 'shorter', 'longer', 'well', 'though', 'time', 'making', 'instead', 'rather', 'few', 'Brokerages', 'brokers', 'brokerage', 'lenders', 'insurers', 'banks', 'issuers', 'firms', 'securities', 'bankers', 'steelmakers', 'Our', 'their', 'my', 'your', 'own', 'we', 'way', 'what', 'good', 'whatever', 'need', 'Laptop', 'laptops', 'computers', 'phones', 'portable', 'desktop', 'ipod', 'computer', 'handheld', 'pc', 'cellphone

None
None
['Worcester', 'norwich', 'exeter', 'durham', 'northampton', 'peterborough', 'birmingham', 'chelmsford', 'essex', 'hereford', 'stamford', 'JohannesburgAccountants', 'African', 'africa', 'africans', 'asian', 'south', 'nations', 'continent', 'country', 'zimbabwe', 'american', 'kenya', 'Professional', 'amateur', 'football', 'career', 'player', 'club', 'players', 'playing', 'team', 'sports', 'basketball', 'Financial', 'banking', 'corporate', 'economic', 'credit', 'investment', 'business', 'global', 'management', 'fund', 'banks', 'Institute', 'research', 'sciences', 'studies', 'science', 'university', 'study', 'academy', 'foundation', 'society', 'professor', 'Pastel', 'pastels', 'shades', 'monochromatic', 'watercolor', 'multicolored', 'hues', 'palette', 'garish', 'canvases', 'shimmering', 'South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'Stellenbosch', 'paarl', 'potchefstroom', 'witwatersrand', 'kwazulu-natal', 'khaye

None
['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaThe', 'Graduate', 'undergraduate', 'graduating', 'harvard', 'enrolled', 'graduated', 'college', 'graduates', 'faculty', 'university', 'yale', 'Fraud', 'bribery', 'corruption', 'embezzlement', 'charges', 'evasion', 'irregularities', 'alleging', 'theft', 'laundering', 'allegations', 'Development', 'projects', 'project', 'developing', 'environment', 'research', 'economic', 'cooperation', 'planning', 'develop', 'creation', 'Compliance', 'comply', 'requirements', 'implementation', 'standards', 'verification', 'supervision', 'inspections', 'obligations', 'complying', 'disarmament', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'RiskRequired', 'Fields', 'field', 'areas', 'natural', 'agricultural', 'well', 'projects', 'surrounding', 'farming', 'land', 'development']
 
None
None
Non

None
['Commerce', 'trade', 'department', 'transportation', 'tourism', 'finance', 'industry', 'affairs', 'agriculture', 'business', 'bureau', 'Informatics', 'biomedical', 'neuroscience', 'cybernetics', 'sciences', 'bioinformatics', 'microbiology', 'humanities', 'robotics', 'nanotechnology', 'olympiad', 'Investec', 'lereah', 'calyon', 'bittles', 'numis', 'ehrenkrantz', 'ladwa', 'readerman', 'painewebber', 'pecoriello', 'hambrecht', 'Grad', 'bm-21', 'grads', 'mrkonjic', 'mrkonjić', 'katyusha', 'ib', 'rocket', '122mm', 'sipovo', 'rockets', 'Business', 'industry', 'companies', 'businesses', 'marketing', 'company', 'corporate', 'financial', 'market', 'investment', 'management', 'Programme', 'programmes', 'program', 'programs', 'project', 'bbc', 'development', 'projects', 'funding', 'educational', 'activities', 'Bachelor', 'doctorate', 'degree', 'bachelors', 'graduate', 'ph.d.', 'b.a.', 'undergraduate', 'graduated', 'phd', 'diploma', 'Tech', 'technology', 'electronics', 'computer', 'chip', 'b

['South', 'north', 'west', 'east', 'western', 'southern', 'southeast', 'africa', 'korea', 'northern', 'northeast', 'AfricaWholesale', 'Institution', 'institutions', 'society', 'institute', 'established', 'private', 'university', 'foundation', 'academic', 'education', 'educational', 'Group', 'groups', 'based', 'members', 'organization', 'led', 'which', 'formed', 'alliance', 'joined', 'called', 'Industrial', 'manufacturing', 'industries', 'sector', 'sectors', 'agricultural', 'construction', 'machinery', 'textile', 'industry', 'mining', 'Digital', 'electronic', 'analog', 'video', 'audio', 'multimedia', 'computer', 'wireless', 'interactive', 'internet', 'computers', 'Education', 'educational', 'schools', 'teaching', 'curriculum', 'programs', 'social', 'health', 'school', 'academic', 'welfare', 'Human', 'animal', 'rights', 'humans', 'animals', 'body', 'nature', 'beings', 'organization', 'scientific', 'common', 'Marketing', 'advertising', 'business', 'merchandising', 'product', 'management',

In [110]:
newdf = pd.DataFrame(columns=["ExtendedTags"])

In [111]:
newdf["ExtendedTags"] = finaltagls

In [113]:
newdf["ExtendedTags"][0]

'South, north, west, east, western, southern, southeast, africa, korea, northern, northeast, AfricaChemical, Institution, institutions, society, institute, established, private, university, foundation, academic, education, educational, Digital, electronic, analog, video, audio, multimedia, computer, wireless, interactive, internet, computers, Energy, resources, gas, renewable, natural, petroleum, electricity, oil, power, development, fuel, Metals, minerals, copper, commodities, ferrous, commodity, ores, aluminium, alloys, chemicals, zinc, Mining, coal, mines, copper, industrial, ore, mine, mineral, exploration, minerals, logging, Eng, fra, aus, capt, saracens, leicester, rsa, esp, harlequins, /, gareth, Engineering, sciences, technology, technical, science, mechanical, engineers, institute, mathematics, biomedical, physics, Tertiary, secondary, schooling, post-secondary, vocational, quaternary, colleges, undergraduate, pupils, education, university-level'

In [114]:
newdf.shape

(198, 1)

In [133]:

prompt_words = ['engineering', 'chemical', 'marketing', 'Limpopo']

all_sentences = []

for column in extjobs.columns:
    for index, sentence in enumerate(extjobs[column]):
        tokens = nlp(sentence.lower())
        tokens = [token.text for token in tokens if not token.is_punct and not token.is_stop]
        doc = nlp(" ".join(tokens))
        
        similarities = [doc.similarity(nlp(word.lower())) for word in prompt_words]
        
        max_similarity = max(similarities)
        all_sentences.append((sentence, max_similarity, index))
        
sorted_sentences = sorted(all_sentences, key=lambda x: x[1], reverse=True)

top_10_indexes = [item[2] for item in sorted_sentences[:10]]

print("Indexes of top 10 sentences:", top_10_indexes)


  similarities = [doc.similarity(nlp(word.lower())) for word in prompt_words]


Indexes of top 10 sentences: [77, 124, 15, 15, 84, 153, 15, 84, 153, 15]


In [134]:
prompt_words = ['engineering', 'chemical', 'marketing']
extjobs.iloc[top_10_indexes]

Unnamed: 0,companyName,jobType,location,position,description,Aggregation,ExtendedTags
8,University of Venda,Internship,Limpopo,Internship Programme 2023,The of University of Venda invites application...,"South, AfricaWholesale, Institution, Group, In...","South, north, west, east, western, southern, s..."
55,"Limpopo Department of Sport, Arts & Culture",Internship,Limpopo,Internship Programme 2023,Workplace experience posts for unemployed grad...,"South, Marketing, AfricaSkills, Trainee, Gradu...","South, north, west, east, western, southern, s..."
15,Sasol,Graduate programme,Mpumalanga,Engineering Graduate Programme 2023,This role is for a position in the graduate de...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
15,Sasol,Graduate programme,Mpumalanga,Engineering Graduate Programme 2023,This role is for a position in the graduate de...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
15,ABSA,Internship,Gauteng,Graduate Internship Programme 2023,Position Title: Risk Management and Dispute Re...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
84,National Credit Regulator (NCR),Internship,Gauteng,Procurement Internship Programme 2023,Duration: 12 months contract\nStipend: R5391.0...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
15,Sasol,Graduate programme,Mpumalanga,Engineering Graduate Programme 2023,This role is for a position in the graduate de...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
15,ABSA,Internship,Gauteng,Graduate Internship Programme 2023,Position Title: Risk Management and Dispute Re...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
84,National Credit Regulator (NCR),Internship,Gauteng,Procurement Internship Programme 2023,Duration: 12 months contract\nStipend: R5391.0...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."
15,Sasol,Graduate programme,Mpumalanga,Engineering Graduate Programme 2023,This role is for a position in the graduate de...,"Geology, Chemical, Trainee, Industrial, Gradua...","Geology, mineralogy, zoology, geography, archa..."


In [None]:
#Outputing the final extdf
extjobs.to_csv("synthesized.csv")