# Enriched PIAAC2ESCO 
We enrich the existing validated mapping by introducing alernative labels and hidden labels as synonyms of the skill that is matched.

## To do:
- add other ESCO Skills that are complement to those matched and enriched

In [1]:
# Libraries
import zipfile
import pandas as pd
import numpy as np
import ast

Import ESCO and prepare the dataset

In [59]:
with zipfile.ZipFile('utils/ESCO dataset - v1.0.3 - classification - en - csv.zip') as z:
   df=pd.read_csv(z.open('skills_en.csv'))

# Create a newcolumn to use hidden labels as preferred labels. 
df['newcol']=df['preferredLabel']+'\n'+df['altLabels']+'\n'+df['hiddenLabels'].astype(str)
df['newcol'].replace({'\nnan':''},regex=True, inplace=True)
# Newcol: From string to list
df['newcol']=df['newcol'].str.split(pat='\n') 

## alternative and hidden lables: From string to list
# df['altLabels']=df['altLabels'].str.split(pat='\n')
# df['hiddenLabels']=df['hiddenLabels'].str.split(pat='\n')

# Select 
df_morelabels=df[['preferredLabel','newcol','conceptUri']]

In [3]:
# # TEST ESCOskill use word processing software
# df_morelabels[df_morelabels['preferredLabel']=='use word processing software'].explode('newcol')

Import the PIAAC-ESCO mapping

In [71]:
df_piaac2esco=pd.read_excel('utils/ESCO_PIAAC_results_mapped_v2.xlsx',usecols=['Isco_Level_2', 'preferredLabel', 'Question Id', 'Question Description','ESCO skill', 'others_skill_rca']).rename(columns={'preferredLabel':'Isco_Level_2_preferredLabel','Question Id':'PIAAC_QuestionId','Question Description':'PIAAC_QuestionDescription','ESCO skill':'ESCOskill','others_skill_rca':'ESCOskill_others'})
# Convert lists from string to list dtypes
df_piaac2esco.loc[:,['ESCOskill','ESCOskill_others']]=df_piaac2esco.loc[:,['ESCOskill','ESCOskill_others']].applymap(lambda x : ast.literal_eval(x))
# Explode the dataset
df_piaac2esco_xlpd=df_piaac2esco.explode('ESCOskill')
df_piaac2esco_xlpd=df_piaac2esco_xlpd[['PIAAC_QuestionId','PIAAC_QuestionDescription','ESCOskill']].drop_duplicates()

Merge the piaac2esco mapping and the enriched set of labels

In [78]:
df_morelabels_xpld=df_morelabels.merge(df_piaac2esco_xlpd,left_on='preferredLabel', right_on='ESCOskill', how='inner').iloc[:,1:-1].rename(columns={'newcol':'ESCOskill'}).explode('ESCOskill')

# concat in final dataset
df_final=pd.concat([df_piaac2esco_xlpd, df_morelabels_xpld])

# Select and polish
df_final=df_final.sort_values(['PIAAC_QuestionId','ESCOskill']).loc[:,['PIAAC_QuestionId','PIAAC_QuestionDescription','ESCOskill','conceptUri']]
df_final=df_final.loc[df_final['conceptUri'].notna()].drop_duplicates()
df_final.rename(columns={'ESCOskill':'ESCO_skill_en','conceptUri':'ESCO_skill_conceptUri'},inplace=True)
df_final['ESCO_version']='v1.0.8'

In [81]:
# Export
df_final[['PIAAC_QuestionId','PIAAC_QuestionDescription','ESCO_skill_conceptUri','ESCO_skill_en','ESCO_version']].to_csv('output/PIAAC2ESCO.csv', index=False)

In [82]:
df_final[['PIAAC_QuestionId','PIAAC_QuestionDescription','ESCO_skill_conceptUri','ESCO_skill_en','ESCO_version']]

Unnamed: 0,PIAAC_QuestionId,PIAAC_QuestionDescription,ESCO_skill_conceptUri,ESCO_skill_en,ESCO_version
32,F_Q02b,instructing training or teaching people indivi...,http://data.europa.eu/esco/skill/87439d9b-b537...,coach young people,v1.0.8
32,F_Q02b,instructing training or teaching people indivi...,http://data.europa.eu/esco/skill/87439d9b-b537...,coach youngsters,v1.0.8
20,F_Q02b,instructing training or teaching people indivi...,http://data.europa.eu/esco/skill/699e7c26-6502...,educate others,v1.0.8
32,F_Q02b,instructing training or teaching people indivi...,http://data.europa.eu/esco/skill/87439d9b-b537...,educate young people,v1.0.8
32,F_Q02b,instructing training or teaching people indivi...,http://data.europa.eu/esco/skill/87439d9b-b537...,facilitate young peoples education,v1.0.8
...,...,...,...,...,...
39,I_Q04l,I like to figure out how different ideas fit t...,http://data.europa.eu/esco/skill/c624c6a3-b0ba...,thinking creatively,v1.0.8
45,I_Q04l,I like to figure out how different ideas fit t...,http://data.europa.eu/esco/skill/e186976a-64f0...,thinking proactively,v1.0.8
39,I_Q04l,I like to figure out how different ideas fit t...,http://data.europa.eu/esco/skill/c624c6a3-b0ba...,use imagination,v1.0.8
39,I_Q04l,I like to figure out how different ideas fit t...,http://data.europa.eu/esco/skill/c624c6a3-b0ba...,visualise completed project,v1.0.8
