Map ONET Skills onto Tasks via Occupations Dataset
---

By Paul Duckworth 17th Nov 2017.

Create a skills/abilies vector per (DWA) task from ONET datasets: Skills, Abilities, Occupations, Tasks, DWAs


In [1]:
import os
import numpy as np
import pandas as pd
import getpass
import cPickle as pickle
from random import shuffle
%matplotlib inline

# Point this at your ONET data: 
datasets = '/home/'+ getpass.getuser() +'/Datasets/'
print datasets

# survey_data_= pd.read_csv(os.path.join(datasets, 'FoEmployment/fow-expert-survey/data/cleaned/counts_data_with_metadata.csv'))
survey_data_ = pd.read_csv(os.path.join(datasets, 'FoEmployment/fow-expert-survey/data/cleaned/2018_1_12_cleaned.csv'))

# Investigate skills: 
# s = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Skills.txt'), sep='\t')
# s.rename(columns = {'Title':'Occupation title'}, inplace = True)
# s[s['O*NET-SOC Code'].apply(lambda x: x[-1] != "0")] #['O*NET-SOC Code'].unique()

/home/scpd/Datasets/


# ONET Datasets:

## Occupations and Tasks 

In [2]:
occupations = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Occupation Data.txt'), sep='\t')
occupations.rename(columns = {'Title':'Occupation title'}, inplace = True)
occupations.head()

Unnamed: 0,O*NET-SOC Code,Occupation title,Description
0,11-1011.00,Chief Executives,Determine and formulate policies and provide o...
1,11-1011.03,Chief Sustainability Officers,"Communicate and coordinate with management, sh..."
2,11-1021.00,General and Operations Managers,"Plan, direct, or coordinate the operations of ..."
3,11-1031.00,Legislators,"Develop, introduce or enact laws and statutes ..."
4,11-2011.00,Advertising and Promotions Managers,"Plan, direct, or coordinate advertising polici..."


In [85]:
tasks = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Task Statements.txt'), sep='\t')
tasks = tasks[['O*NET-SOC Code', 'Task ID', 'Task']]

reduce_tasks = tasks['O*NET-SOC Code'].unique()#[:2]
tasks = tasks[tasks['O*NET-SOC Code'].isin(reduce_tasks)]      # reduce the task matrix for now :)
print tasks.shape, "UNIQUE tasks: ", len(tasks['Task ID'].unique())  # All tasks are unique to Occupation it seems
tasks

# Tasks do not overlap between ONET Codes which seem to be hierarchical. 
# tasks[tasks['O*NET-SOC Code'].isin(['11-1011.03','11-1011.00'])]



(19566, 3) UNIQUE tasks:  19566


Unnamed: 0,O*NET-SOC Code,Task ID,Task
0,11-1011.00,8823,Direct or coordinate an organization's financi...
1,11-1011.00,8831,Appoint department heads or managers and assig...
2,11-1011.00,8825,Analyze operations to evaluate performance of ...
3,11-1011.00,8826,"Direct, plan, or implement policies, objective..."
4,11-1011.00,8827,"Prepare budgets for approval, including those ..."
5,11-1011.00,8824,"Confer with board members, organization offici..."
6,11-1011.00,8836,Implement corrective action plans to solve org...
7,11-1011.00,8832,"Direct human resources activities, including t..."
8,11-1011.00,8835,Establish departmental responsibilities and co...
9,11-1011.00,8833,"Preside over or serve on boards of directors, ..."


In [4]:
#Task DWAs (detailed work activitiy code):
taskDWA = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Tasks to DWAs.txt'), sep='\t')
taskDWA = taskDWA[['Task ID', 'DWA ID']]

print taskDWA.shape, "UNIQUE DWA: ", len(taskDWA['DWA ID'].unique()) 
taskDWA.head()

(22838, 2) UNIQUE DWA:  2070


Unnamed: 0,Task ID,DWA ID
0,20461,4.A.2.a.4.I09.D03
1,20461,4.A.4.b.6.I08.D04
2,8823,4.A.4.b.4.I09.D02
3,8824,4.A.4.a.2.I03.D14
4,8825,4.A.2.a.4.I07.D09


In [5]:
df = pd.merge(tasks, taskDWA,  how='left', left_on=['Task ID'], right_on = ['Task ID']).sort_values(by = 'Task ID')
df = df[df['DWA ID'].notnull()]
df['IWA ID'] = df['DWA ID'].str.slice(0,-4)    # create IWA ID
df['WA ID'] = df['DWA ID'].str.slice(0,-8)     # create WA ID

## ADD DWA and IWA titles:
DWAref = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/DWA Reference.txt'), sep='\t')[['DWA ID', 'DWA Title']]
taskDWA2 = pd.merge(df, DWAref,  how='left', left_on=['DWA ID'], right_on = ['DWA ID'])

IWAref = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/IWA Reference.txt'), sep='\t')[['IWA ID', 'IWA Title']]
df2 = pd.merge(df, IWAref,  how='left', left_on=['IWA ID'], right_on = ['IWA ID'])

# cols = ['Task ID', 'Task', 'DWA ID', 'DWA Title', 'IWA ID', 'IWA Title', 'WA ID']
cols = ['O*NET-SOC Code', 'Task ID', 'DWA ID', 'IWA ID', 'WA ID']
df2 = df2[cols]

# tasks are many-to-many with DWA, e.g. task id=8826.
print df2.shape, "UNIQUE DWA: ", len(df2['DWA ID'].unique())
df2.head()

(22838, 5) UNIQUE DWA:  2070


Unnamed: 0,O*NET-SOC Code,Task ID,DWA ID,IWA ID,WA ID
0,11-2022.00,1,4.A.4.a.8.I03.D05,4.A.4.a.8.I03,4.A.4.a.8
1,11-2022.00,2,4.A.1.a.1.I14.D04,4.A.1.a.1.I14,4.A.1.a.1
2,11-2022.00,3,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4
3,11-2022.00,4,4.A.2.b.4.I01.D06,4.A.2.b.4.I01,4.A.2.b.4
4,11-2022.00,5,4.A.2.a.4.I11.D06,4.A.2.a.4.I11,4.A.2.a.4


# Task Importance to a DWA (weight)

In [6]:
# Task Importance: (Each task is unique to it's occupation)
task_rates = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Task Ratings.txt'), sep='\t')
task_im = task_rates[task_rates['Scale ID'] == 'IM']
task_im.rename(columns = {'Data Value':'Task IM'}, inplace = True)

task_im = task_im[['Task ID', 'Task IM']].sort_values('Task ID')
print task_im.shape
df3 = pd.merge(df2, task_im, how='left', left_on=['Task ID'], right_on = ['Task ID'])
print df3.shape
# Remove Task if no Task Importance: 
df3 = df3[df3['Task IM'].notnull()]   
print df3.shape  # 500 missing Task IM 
df3

(19125, 2)
(22838, 6)
(22365, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  **kwargs)


Unnamed: 0,O*NET-SOC Code,Task ID,DWA ID,IWA ID,WA ID,Task IM
0,11-2022.00,1,4.A.4.a.8.I03.D05,4.A.4.a.8.I03,4.A.4.a.8,4.09
1,11-2022.00,2,4.A.1.a.1.I14.D04,4.A.1.a.1.I14,4.A.1.a.1,3.57
2,11-2022.00,3,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4,4.22
3,11-2022.00,4,4.A.2.b.4.I01.D06,4.A.2.b.4.I01,4.A.2.b.4,3.61
4,11-2022.00,5,4.A.2.a.4.I11.D06,4.A.2.a.4.I11,4.A.2.a.4,3.95
5,11-2022.00,6,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4,3.48
6,11-2022.00,7,4.A.4.a.2.I03.D14,4.A.4.a.2.I03,4.A.4.a.2,3.25
7,11-2022.00,8,4.A.4.b.6.I05.D10,4.A.4.b.6.I05,4.A.4.b.6,3.26
8,11-2022.00,9,4.A.2.b.1.I03.D04,4.A.2.b.1.I03,4.A.2.b.1,3.59
9,11-2022.00,9,4.A.4.b.4.I09.D04,4.A.4.b.4.I09,4.A.4.b.4,3.59


In [7]:
by = ['DWA ID']
task_im_by_dwa = df3.groupby(by).sum().reset_index()[['DWA ID','Task IM']]
task_im_by_dwa.rename(columns = {'Task IM' : 'IM per DWA Sum'}, inplace = True)

df4_ = pd.merge(df3, task_im_by_dwa,  how='left', left_on=['DWA ID'], right_on = ['DWA ID'])
df4_['Task IM per DWA Weight'] = df4_['Task IM'] / df4_['IM per DWA Sum']

print df4_[df4_['DWA ID']=='4.A.4.a.8.I03.D05']['Task IM per DWA Weight'].sum() 
df4_.head()


1.0


Unnamed: 0,O*NET-SOC Code,Task ID,DWA ID,IWA ID,WA ID,Task IM,IM per DWA Sum,Task IM per DWA Weight
0,11-2022.00,1,4.A.4.a.8.I03.D05,4.A.4.a.8.I03,4.A.4.a.8,4.09,84.74,0.048265
1,11-2022.00,2,4.A.1.a.1.I14.D04,4.A.1.a.1.I14,4.A.1.a.1,3.57,33.92,0.105248
2,11-2022.00,3,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4,4.22,56.68,0.074453
3,11-2022.00,4,4.A.2.b.4.I01.D06,4.A.2.b.4.I01,4.A.2.b.4,3.61,28.76,0.125522
4,11-2022.00,5,4.A.2.a.4.I11.D06,4.A.2.a.4.I11,4.A.2.a.4,3.95,17.99,0.219566


# Occupation Importance to a Task (weight)

In [54]:
by = ['O*NET-SOC Code']
task_im_by_occu = df3.groupby(by).sum().reset_index()[['O*NET-SOC Code','Task IM']]
task_im_by_occu.rename(columns = {'Task IM' : 'IM per Occu Sum'}, inplace = True)
# task_im_by_occu
df4 = pd.merge(df4_, task_im_by_occu,  how='left', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])
df4['Task IM per Occu Weight'] = df4['Task IM'] / df4['IM per Occu Sum']
df4.head()
# df4[df4['DWA ID'] == '4.A.4.a.8.I03.D05'] or df4[df4['O*NET-SOC Code'] == '11-9051.00']

I_o = df4['Task IM per Occu Weight']
df4['Task IM per Occu Weight Norm'] =  (I_o - I_o.min())/ (I_o.max() - I_o.min())

I_w = df4['Task IM per DWA Weight']
df4['Task IM per DWA Weight Norm'] =  (I_w - I_w.min())/ (I_w.max() - I_w.min())

df4

Unnamed: 0,O*NET-SOC Code,Task ID,DWA ID,IWA ID,WA ID,Task IM,IM per DWA Sum,Task IM per DWA Weight,IM per Occu Sum,Task IM per Occu Weight,Task IM per Occu Weight Norm,Task IM per DWA Weight Norm
0,11-2022.00,1,4.A.4.a.8.I03.D05,4.A.4.a.8.I03,4.A.4.a.8,4.09,84.74,0.048265,68.03,0.060121,0.258294,0.042615
1,11-2022.00,2,4.A.1.a.1.I14.D04,4.A.1.a.1.I14,4.A.1.a.1,3.57,33.92,0.105248,68.03,0.052477,0.220042,0.099936
2,11-2022.00,3,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4,4.22,56.68,0.074453,68.03,0.062031,0.267856,0.068958
3,11-2022.00,4,4.A.2.b.4.I01.D06,4.A.2.b.4.I01,4.A.2.b.4,3.61,28.76,0.125522,68.03,0.053065,0.222984,0.120330
4,11-2022.00,5,4.A.2.a.4.I11.D06,4.A.2.a.4.I11,4.A.2.a.4,3.95,17.99,0.219566,68.03,0.058063,0.247995,0.214933
5,11-2022.00,6,4.A.4.b.4.I12.D03,4.A.4.b.4.I12,4.A.4.b.4,3.48,56.68,0.061397,68.03,0.051154,0.213422,0.055825
6,11-2022.00,7,4.A.4.a.2.I03.D14,4.A.4.a.2.I03,4.A.4.a.2,3.25,114.84,0.028300,68.03,0.047773,0.196503,0.022532
7,11-2022.00,8,4.A.4.b.6.I05.D10,4.A.4.b.6.I05,4.A.4.b.6,3.26,134.70,0.024202,68.03,0.047920,0.197238,0.018409
8,11-2022.00,9,4.A.2.b.1.I03.D04,4.A.2.b.1.I03,4.A.2.b.1,3.59,53.14,0.067557,68.03,0.052771,0.221513,0.062022
9,11-2022.00,9,4.A.4.b.4.I09.D04,4.A.4.b.4.I09,4.A.4.b.4,3.59,95.99,0.037400,68.03,0.052771,0.221513,0.031685


# Do Not: Weight by Number of Employees

In [9]:
# # Use only the .00 Occupation codes: 

# emp_data = pd.read_csv(os.path.join(datasets, 'ONET/employment_figures_including_doubles.csv'))
# employment_data = emp_data[emp_data['O*NET-SOC Code'].apply(lambda x: ".00" in x)]#['O*NET-SOC Code'].unique()

# df5 = pd.merge(df4, employment_data[["Employment", "O*NET-SOC Code"]],  how='right', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])
# print df4.shape, " vs ", df5.shape
# df5.head()


In [10]:
# emp_dwa = df5.groupby(['DWA ID']).sum().reset_index()[['DWA ID', 'Employment']]
# emp_dwa.rename(columns = {'Employment':'Employment per DWA Sum'}, inplace=True)

# df5_e = pd.merge(df5, emp_dwa,  how='left', left_on=['DWA ID'], right_on = ['DWA ID'])
# df5_e['Employment per DWA Norm'] = df5_e['Employment']/df5_e['Employment per DWA Sum']


## Skills by Occupations

In [88]:
skills = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Skills.txt'), sep='\t', low_memory=False)
skills.head()
skills = skills[skills['Scale ID'] == 'IM']
# skills['Element_pivot'] =  skills['Element Name'] + " " +  skills['Scale ID']
skills_pivot = skills.pivot(index = 'O*NET-SOC Code', columns='Element Name', values='Data Value').fillna(0)
skills_pivot.reset_index(inplace=True)
print skills_pivot.shape
skills_pivot.columns

(964, 36)


Index([u'O*NET-SOC Code', u'Active Learning', u'Active Listening',
       u'Complex Problem Solving', u'Coordination', u'Critical Thinking',
       u'Equipment Maintenance', u'Equipment Selection', u'Installation',
       u'Instructing', u'Judgment and Decision Making', u'Learning Strategies',
       u'Management of Financial Resources',
       u'Management of Material Resources',
       u'Management of Personnel Resources', u'Mathematics', u'Monitoring',
       u'Negotiation', u'Operation Monitoring', u'Operation and Control',
       u'Operations Analysis', u'Persuasion', u'Programming',
       u'Quality Control Analysis', u'Reading Comprehension', u'Repairing',
       u'Science', u'Service Orientation', u'Social Perceptiveness',
       u'Speaking', u'Systems Analysis', u'Systems Evaluation',
       u'Technology Design', u'Time Management', u'Troubleshooting',
       u'Writing'],
      dtype='object', name=u'Element Name')

In [12]:
# WA['Element_pivot'] =  WA['Element Name'] + " " +  WA['Scale ID']
# WA_pivot = WA.pivot(index = 'O*NET-SOC Code', columns='Element_pivot', values='Data Value').fillna(0)
# WA_pivot.reset_index(inplace=True)

# WA_pivot

# occupation_level_skills_wa = pd.merge(df_skills, WA_pivot,  how='left', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])
# occupation_level_skills_wa = occupation_level_skills_wa.sort_values(by = 'Observed Occupation')
# occupation_level_skills_wa

In [87]:
know = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Knowledge.txt'), sep='\t', low_memory=False)
know = know[know['Scale ID'] == 'IM']
# know['Element_pivot'] =  know['Element Name'] + " " +  know['Scale ID']
know_pivot = know.pivot(index = 'O*NET-SOC Code', columns='Element Name', values='Data Value').fillna(0)
know_pivot.reset_index(inplace=True)
print know_pivot.shape
know_pivot.columns

(964, 34)


Index([u'O*NET-SOC Code', u'Administration and Management', u'Biology',
       u'Building and Construction', u'Chemistry', u'Clerical',
       u'Communications and Media', u'Computers and Electronics',
       u'Customer and Personal Service', u'Design',
       u'Economics and Accounting', u'Education and Training',
       u'Engineering and Technology', u'English Language', u'Fine Arts',
       u'Food Production', u'Foreign Language', u'Geography',
       u'History and Archeology', u'Law and Government', u'Mathematics',
       u'Mechanical', u'Medicine and Dentistry',
       u'Personnel and Human Resources', u'Philosophy and Theology',
       u'Physics', u'Production and Processing', u'Psychology',
       u'Public Safety and Security', u'Sales and Marketing',
       u'Sociology and Anthropology', u'Telecommunications',
       u'Therapy and Counseling', u'Transportation'],
      dtype='object', name=u'Element Name')

In [86]:
abilities = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Abilities.txt'), sep='\t', low_memory=False)
abilities = abilities[abilities['Scale ID'] == 'IM']
#abilities['Element_pivot'] =  abilities['Element Name'] + " " +  abilities['Scale ID']
abilities_pivot = abilities.pivot(index = 'O*NET-SOC Code', columns='Element Name', values='Data Value').fillna(0)
abilities_pivot.reset_index(inplace=True)
print abilities_pivot.shape
abilities_pivot.columns

(964, 53)


Index([u'O*NET-SOC Code', u'Arm-Hand Steadiness', u'Auditory Attention',
       u'Category Flexibility', u'Control Precision', u'Deductive Reasoning',
       u'Depth Perception', u'Dynamic Flexibility', u'Dynamic Strength',
       u'Explosive Strength', u'Extent Flexibility', u'Far Vision',
       u'Finger Dexterity', u'Flexibility of Closure', u'Fluency of Ideas',
       u'Glare Sensitivity', u'Gross Body Coordination',
       u'Gross Body Equilibrium', u'Hearing Sensitivity',
       u'Inductive Reasoning', u'Information Ordering', u'Manual Dexterity',
       u'Mathematical Reasoning', u'Memorization', u'Multilimb Coordination',
       u'Near Vision', u'Night Vision', u'Number Facility',
       u'Oral Comprehension', u'Oral Expression', u'Originality',
       u'Perceptual Speed', u'Peripheral Vision', u'Problem Sensitivity',
       u'Rate Control', u'Reaction Time', u'Response Orientation',
       u'Selective Attention', u'Sound Localization', u'Spatial Orientation',
       u'Speech C


# Skills by Occupations and Tasks 

In [15]:
#access the occupation skills vector like this:
# skills_pivot[skills_pivot['O*NET-SOC Code'] == '11-1011.00'].values[0][1:]

## Weight Skills by Importance of Task and Frequency of Task

In [16]:
# # Do Not Normalise by Occupation (rarther by DWA - above)
# task_im_sum = task_im.groupby('O*NET-SOC Code').sum().reset_index()
# task_im_sum.rename(columns = {'Task IM': 'IM Sum per Occu'}, inplace= True)
# task_im_sum = task_im_sum[['O*NET-SOC Code', 'IM Sum per Occu']]

# task_ims = pd.merge(task_im, task_im_sum,  how='left', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])
# task_ims['Task IM Norm'] = task_ims['Task IM'] / task_ims['IM Sum per Occu'] 
# task_ims

In [17]:
# # Do Not Use Frequency: Bit hacky. 

# task_freq = task_rates[task_rates.loc[:,('Scale ID')] == 'FT']

# # Manually change Frequency Categories into Numeric value per Day ## A bit hacky :) 
# time_categories = {'1' : (1/365.),  # Yearly or less
#                    '2' : (4/365.),  # More than yearly
#                    '3' : (3/12.),   # More than monthly
#                    '4' : (3/7.),    # More than weekly
#                    '5' : 1.,            # Daily
#                    '6' : 3.,            # Several times daily
#                    '7' : 8.}            # Hourly or more

# # frequency is only relative:
# task_freq.loc[:,('Temp')] = [float(time_categories[i]) for i in task_freq.loc[:,('Category')].values]
# task_freq.loc[:,('Freq')] = task_freq.loc[:,('Temp')]*task_freq.loc[:,('Data Value')]

In [18]:
# by = ['O*NET-SOC Code', 'Task ID']
# task_freq = task_freq.groupby(by).mean().reset_index()
# task_freq.rename(columns = {'Freq':'Task Freq'}, inplace = True)
# cols = by + ['Task Freq']
# task_freq = task_freq[cols]

# task_freq_norm = task_freq.groupby('O*NET-SOC Code').sum().reset_index()
# task_freq_norm.rename(columns = {'Task Freq': 'Sum per Occu'}, inplace= True)
# task_freq_norm = task_freq_norm[['O*NET-SOC Code', 'Sum per Occu']]

# task_freqs = pd.merge(task_freq, task_freq_norm,  how='left', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])
# task_freqs.loc[:, ('Task Freq Norm')] = task_freqs.loc[:, ('Task Freq')] / task_freqs.loc[:, ('Sum per Occu')]

# task_freqs.groupby('O*NET-SOC Code').sum() # check they sum to 1 :) 
# task_freqs[['O*NET-SOC Code', 'Task ID', 'Task Freq', 'Task Freq Norm']]

# Merge 120 features together

In [58]:
# print skills_pivot.shape[1] + know_pivot.shape[1] + abilities_pivot.shape[1]
all_features = skills_pivot.merge(know_pivot,on='O*NET-SOC Code').merge(abilities_pivot,on='O*NET-SOC Code')
print all_features.shape
all_features.head()

(964, 121)


Element Name,O*NET-SOC Code,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,11-1011.00,4.0,4.12,4.38,4.25,4.38,1.0,1.12,1.0,3.12,...,1.0,1.0,1.0,3.0,1.0,1.88,3.12,1.0,4.25,4.12
1,11-1011.03,3.5,3.88,4.0,3.62,4.0,1.0,1.12,1.0,3.25,...,1.0,1.0,1.0,2.62,1.12,2.0,2.75,1.12,4.0,3.88
2,11-1021.00,3.5,4.0,3.5,4.0,3.88,1.0,1.0,1.0,3.12,...,1.5,2.0,2.0,2.88,2.12,2.0,2.75,1.38,4.0,4.0
3,11-2011.00,3.25,4.0,3.5,3.5,3.75,1.0,1.25,1.0,2.88,...,1.0,1.0,1.0,2.75,1.25,2.88,3.0,1.25,3.88,3.88
4,11-2021.00,3.88,3.88,3.62,3.5,3.88,1.0,1.0,1.0,3.0,...,1.0,1.0,1.0,2.75,1.75,2.88,3.0,1.62,4.0,3.88


# Merge Occupancy Features with Weightings

In [71]:
cols = ['O*NET-SOC Code', 'DWA ID', 'Task IM per DWA Weight', 'Task IM per Occu Weight', 'Task IM per DWA Weight Norm', 'Task IM per Occu Weight Norm'] #, 'Employment per DWA Norm']
df6 = pd.merge(df4[cols], all_features,  how='left', left_on=['O*NET-SOC Code'], right_on = ['O*NET-SOC Code'])

# print df6[df6['O*NET-SOC Code'] == '11-2022.00']['Task IM per Occu Norm'].sum()  # Check the IM weights sum to 1 over Occu
print df6.shape
df6

(22365, 126)


Unnamed: 0,O*NET-SOC Code,DWA ID,Task IM per DWA Weight,Task IM per Occu Weight,Task IM per DWA Weight Norm,Task IM per Occu Weight Norm,Active Learning,Active Listening,Complex Problem Solving,Coordination,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,11-2022.00,4.A.4.a.8.I03.D05,0.048265,0.060121,0.042615,0.258294,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
1,11-2022.00,4.A.1.a.1.I14.D04,0.105248,0.052477,0.099936,0.220042,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
2,11-2022.00,4.A.4.b.4.I12.D03,0.074453,0.062031,0.068958,0.267856,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
3,11-2022.00,4.A.2.b.4.I01.D06,0.125522,0.053065,0.120330,0.222984,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
4,11-2022.00,4.A.2.a.4.I11.D06,0.219566,0.058063,0.214933,0.247995,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
5,11-2022.00,4.A.4.b.4.I12.D03,0.061397,0.051154,0.055825,0.213422,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
6,11-2022.00,4.A.4.a.2.I03.D14,0.028300,0.047773,0.022532,0.196503,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
7,11-2022.00,4.A.4.b.6.I05.D10,0.024202,0.047920,0.018409,0.197238,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
8,11-2022.00,4.A.2.b.1.I03.D04,0.067557,0.052771,0.062022,0.221513,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88
9,11-2022.00,4.A.4.b.4.I09.D04,0.037400,0.052771,0.031685,0.221513,3.75,4.00,3.75,3.88,...,1.12,1.00,1.00,2.62,1.75,1.88,2.38,1.50,4.00,3.88


In [63]:
## Weight each skills vector by the Task Importance per DWA: 
func = lambda x: np.asarray(x) * np.asarray(df6['Task IM per DWA Weight Norm'])
df7 = df6[df6.columns[6:]].apply(func)
df7

Unnamed: 0,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,Judgment and Decision Making,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,0.159807,0.170461,0.159807,0.165347,0.165347,0.042615,0.042615,0.042615,0.154267,0.159807,...,0.047729,0.042615,0.042615,0.111652,0.074577,0.080117,0.101424,0.063923,0.170461,0.165347
1,0.374760,0.399744,0.374760,0.387751,0.387751,0.099936,0.099936,0.099936,0.361768,0.374760,...,0.111928,0.099936,0.099936,0.261832,0.174888,0.187879,0.237847,0.149904,0.399744,0.387751
2,0.258594,0.275834,0.258594,0.267559,0.267559,0.068958,0.068958,0.068958,0.249630,0.258594,...,0.077234,0.068958,0.068958,0.180671,0.120677,0.129642,0.164121,0.103438,0.275834,0.267559
3,0.451238,0.481321,0.451238,0.466881,0.466881,0.120330,0.120330,0.120330,0.435595,0.451238,...,0.134770,0.120330,0.120330,0.315265,0.210578,0.226221,0.286386,0.180495,0.481321,0.466881
4,0.806000,0.859733,0.806000,0.833941,0.833941,0.214933,0.214933,0.214933,0.778059,0.806000,...,0.240725,0.214933,0.214933,0.563125,0.376133,0.404075,0.511541,0.322400,0.859733,0.833941
5,0.209345,0.223301,0.209345,0.216602,0.216602,0.055825,0.055825,0.055825,0.202087,0.209345,...,0.062524,0.055825,0.055825,0.146262,0.097694,0.104951,0.132864,0.083738,0.223301,0.216602
6,0.084494,0.090127,0.084494,0.087423,0.087423,0.022532,0.022532,0.022532,0.081565,0.084494,...,0.025235,0.022532,0.022532,0.059033,0.039430,0.042360,0.053625,0.033798,0.090127,0.087423
7,0.069034,0.073636,0.069034,0.071427,0.071427,0.018409,0.018409,0.018409,0.066641,0.069034,...,0.020618,0.018409,0.018409,0.048232,0.032216,0.034609,0.043814,0.027614,0.073636,0.071427
8,0.232582,0.248088,0.232582,0.240645,0.240645,0.062022,0.062022,0.062022,0.224519,0.232582,...,0.069465,0.062022,0.062022,0.162497,0.108538,0.116601,0.147612,0.093033,0.248088,0.240645
9,0.118819,0.126741,0.118819,0.122939,0.122939,0.031685,0.031685,0.031685,0.114700,0.118819,...,0.035487,0.031685,0.031685,0.083015,0.055449,0.059568,0.075411,0.047528,0.126741,0.122939


In [64]:
## Further Weight each skills vector by the Task - Occupation Weight
func = lambda x: np.asarray(x) * np.asarray(df6['Task IM per Occu Weight Norm'])
df7_o = df7.apply(func)
df7_o

Unnamed: 0,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,Judgment and Decision Making,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,0.041277,0.044029,0.041277,0.042708,0.042708,0.011007,0.011007,0.011007,0.039846,0.041277,...,0.012328,0.011007,0.011007,0.028839,0.019263,0.020694,0.026197,0.016511,0.044029,0.042708
1,0.082463,0.087960,0.082463,0.085322,0.085322,0.021990,0.021990,0.021990,0.079604,0.082463,...,0.024629,0.021990,0.021990,0.057614,0.038483,0.041341,0.052336,0.032985,0.087960,0.085322
2,0.069266,0.073884,0.069266,0.071667,0.071667,0.018471,0.018471,0.018471,0.066865,0.069266,...,0.020687,0.018471,0.018471,0.048394,0.032324,0.034725,0.043961,0.027706,0.073884,0.071667
3,0.100619,0.107327,0.100619,0.104107,0.104107,0.026832,0.026832,0.026832,0.097131,0.100619,...,0.030052,0.026832,0.026832,0.070299,0.046956,0.050444,0.063860,0.040248,0.107327,0.104107
4,0.199884,0.213210,0.199884,0.206813,0.206813,0.053302,0.053302,0.053302,0.192955,0.199884,...,0.059699,0.053302,0.053302,0.139652,0.093279,0.100209,0.126860,0.079954,0.213210,0.206813
5,0.044679,0.047657,0.044679,0.046228,0.046228,0.011914,0.011914,0.011914,0.043130,0.044679,...,0.013344,0.011914,0.011914,0.031215,0.020850,0.022399,0.028356,0.017871,0.047657,0.046228
6,0.016603,0.017710,0.016603,0.017179,0.017179,0.004428,0.004428,0.004428,0.016028,0.016603,...,0.004959,0.004428,0.004428,0.011600,0.007748,0.008324,0.010538,0.006641,0.017710,0.017179
7,0.013616,0.014524,0.013616,0.014088,0.014088,0.003631,0.003631,0.003631,0.013144,0.013616,...,0.004067,0.003631,0.003631,0.009513,0.006354,0.006826,0.008642,0.005446,0.014524,0.014088
8,0.051520,0.054955,0.051520,0.053306,0.053306,0.013739,0.013739,0.013739,0.049734,0.051520,...,0.015387,0.013739,0.013739,0.035995,0.024043,0.025829,0.032698,0.020608,0.054955,0.053306
9,0.026320,0.028075,0.026320,0.027233,0.027233,0.007019,0.007019,0.007019,0.025408,0.026320,...,0.007861,0.007019,0.007019,0.018389,0.012283,0.013195,0.016704,0.010528,0.028075,0.027233


In [23]:
# ## Further Weight each skills vector by the Employment Norm: 
# func = lambda x: np.asarray(x) * np.asarray(df6['Employment per DWA Norm'])
# df7_e = df7.apply(func)

In [65]:
df8 = pd.merge(df6[['DWA ID']], df7, how='left', left_index=True, right_index=True)
df8_o = pd.merge(df6[['DWA ID']], df7_o, how='left', left_index=True, right_index=True)

In [66]:
df9 = df8.groupby(['DWA ID'], as_index=False).mean()
df9_o = df8_o.groupby(['DWA ID'], as_index=False).mean()
print df9.shape
df9.head()

(2067, 121)


Unnamed: 0,DWA ID,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,4.A.1.a.1.I01.D01,0.624815,0.742852,0.645804,0.635395,0.673152,0.266106,0.316508,0.206054,0.558404,...,0.211672,0.233278,0.249267,0.493312,0.289242,0.654582,0.689535,0.280644,0.736219,0.646977
1,4.A.1.a.1.I01.D02,0.756793,0.870119,0.76659,0.81617,0.841547,0.3046,0.337659,0.316411,0.74897,...,0.305544,0.353053,0.387294,0.634949,0.466449,0.522932,0.682017,0.465309,0.800036,0.791203
2,4.A.1.a.1.I01.D03,0.462548,0.51971,0.437263,0.504565,0.493712,0.16172,0.269665,0.16172,0.418279,...,0.295402,0.342845,0.334744,0.367708,0.412852,0.532102,0.476528,0.309459,0.485159,0.465752
3,4.A.1.a.1.I01.D04,0.342866,0.412837,0.337737,0.3741,0.384661,0.117392,0.132003,0.113704,0.318195,...,0.153277,0.184791,0.181936,0.293071,0.207983,0.267123,0.322353,0.131383,0.40474,0.364258
4,4.A.1.a.1.I02.D01,0.917932,1.185528,0.903183,1.009968,1.067506,0.329376,0.3562,0.329376,0.750787,...,0.403715,0.449278,0.463667,0.861132,0.555482,0.644727,0.711749,0.632071,1.329938,1.231643


In [67]:
df9_o.head()
# df9_o[df9_o['DWA ID']== '4.A.1.a.1.I01.D01']

Unnamed: 0,DWA ID,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,...,Speed of Limb Movement,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression
0,4.A.1.a.1.I01.D01,0.129554,0.153415,0.13366,0.131497,0.140152,0.05454,0.063814,0.042061,0.114689,...,0.042929,0.046264,0.048732,0.101785,0.058404,0.135439,0.142125,0.056916,0.153168,0.135246
1,4.A.1.a.1.I01.D02,0.156145,0.179877,0.154816,0.167123,0.16977,0.054917,0.059346,0.055845,0.15283,...,0.065873,0.072561,0.083113,0.13131,0.10113,0.102404,0.140568,0.103373,0.159813,0.157843
2,4.A.1.a.1.I01.D03,0.089065,0.099715,0.084327,0.096705,0.094823,0.030995,0.052313,0.030995,0.080266,...,0.05713,0.065709,0.06395,0.070789,0.079326,0.102603,0.091844,0.059211,0.092985,0.089266
3,4.A.1.a.1.I01.D04,0.076334,0.092418,0.075086,0.083131,0.085221,0.025643,0.027835,0.024728,0.071255,...,0.036074,0.043833,0.040051,0.065194,0.04715,0.057468,0.06924,0.028762,0.090541,0.080906
4,4.A.1.a.1.I02.D01,0.165505,0.209946,0.163474,0.179205,0.192848,0.059786,0.064802,0.059786,0.133272,...,0.077188,0.087854,0.091222,0.153472,0.106361,0.116722,0.128531,0.116513,0.240791,0.221677


In [27]:
# # # Version 0.1 : 
# A defensible method for weighting Importance and Frequency of Skills vector per Occu 

# weighted_vecs = np.array([])
# weighted_vecs.shape
# problem_onet_codes = {"skill_vec":set([]), "IM/Freq" : set([]) }

# # try and get rid of this horrible loop. 
# for (cnt, row) in df3.iterrows():
#     if cnt%1000 ==0: print cnt,
    
#     #if task_freq['O*NET-SOC Code'].str.contains(row['O*NET-SOC Code']).any() and task_ims['O*NET-SOC Code'].str.contains(row['O*NET-SOC Code']).any() and skills_pivot['O*NET-SOC Code'].str.contains(row['O*NET-SOC Code']).any():
       
#     try: 
#         skill_vec = skills_pivot[skills_pivot['O*NET-SOC Code'] == row['O*NET-SOC Code']].values[0][1:]
#     except IndexError as e:
# #         print cnt, row['O*NET-SOC Code'], skills_pivot[skills_pivot['O*NET-SOC Code'] == row['O*NET-SOC Code']].values
#         problem_onet_codes["skill_vec"].add(row['O*NET-SOC Code'])
#         continue

#     freq = task_freqs[task_freqs['Task ID'] == row['Task ID']]['Task Freq Norm'].values
#     imp =  task_ims[task_ims['Task ID'] == row['Task ID']]['Task IM Norm'].values

#     try:
#         vec = skill_vec*freq*imp
#     except ValueError as e:
# #         print row['O*NET-SOC Code']
#         problem_onet_codes["IM/Freq"].add(row['O*NET-SOC Code'])
#         continue

#     task_info = np.append(row['Task ID'], row['DWA ID'])
#     weigted_skills = np.append(task_info, vec)

#     try: 
#         weighted_vecs = np.append(weighted_vecs, [weigted_skills], axis=0)
#     except:
#         weighted_vecs = np.array([weigted_skills])

# weighted_vecs.shape

In [28]:
# df4 = pd.DataFrame(data=weighted_vecs)
# task_cols = ['Task ID', 'DWA ID']

# skill_column_names = list(skills_pivot.columns[1:])
# df4.columns = np.append(task_cols, skill_column_names)
# skill_column_names
# df4[skill_column_names] = df4[skill_column_names].apply(pd.to_numeric)
# df4.head()

In [29]:
# # Take the mean average over occupations DWAs
# df5 = df4.groupby(['DWA ID'], as_index=False).mean()
# df5.head()

## Ground Truth Survey data:

In [68]:
survey_data_.rename(columns = {'title':'O*NET Occupation title', 
                              'Unnamed: 0': 'Task'}, inplace = True)

# Change Ordinal Data to Numeric - bit hacky
ratings = [4,3,2,1,0]
survey_data_['GT Rating'] = (survey_data_['Completely Automatable Today']*ratings[0] + survey_data_['Could be Mostly Automated Today (Human Still Needed)']*ratings[1] + survey_data_['Mostly Not Automatable Today (Human Does Most of It)']*ratings[2] + survey_data_['Not Automatable Today']*ratings[3] + survey_data_['Unsure']*ratings[4]) / survey_data_['Number of Responses']                
                
survey_data = survey_data_[['Task ID', 'GT Rating']].sort_values(by='Task ID')
print survey_data['GT Rating'].sum()

#Task DWAs (detailed work activitiy code):
taskDWA = pd.read_table(os.path.join(datasets, 'ONET/databases/db2016/Tasks to DWAs.txt'), sep='\t')
taskDWA = taskDWA[['Task ID', 'DWA ID']]

print taskDWA.shape, "UNIQUE DWA: ", len(taskDWA['DWA ID'].unique()) 
task_dwa_rat = pd.merge(taskDWA, survey_data, how='left', left_on=['Task ID'], right_on = ['Task ID'])

DWA_mean_rating = task_dwa_rat.groupby(['DWA ID']).mean().reset_index().rename(columns = {'GT Rating':'DWA GT Rating'})
DWA_mean_rating = DWA_mean_rating[['DWA ID', 'DWA GT Rating']]

print "Unique DWAs Annotated = ", DWA_mean_rating[DWA_mean_rating['DWA GT Rating'].notnull()].shape

data = pd.merge(df9, DWA_mean_rating,  how='left', left_on=['DWA ID'], right_on = ['DWA ID'])
data.head()

data_o = pd.merge(df9_o, DWA_mean_rating,  how='left', left_on=['DWA ID'], right_on = ['DWA ID'])
data_o.head()

922.543188756
(22838, 2) UNIQUE DWA:  2070
Unique DWAs Annotated =  (314, 2)


Unnamed: 0,DWA ID,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,...,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression,DWA GT Rating
0,4.A.1.a.1.I01.D01,0.129554,0.153415,0.13366,0.131497,0.140152,0.05454,0.063814,0.042061,0.114689,...,0.046264,0.048732,0.101785,0.058404,0.135439,0.142125,0.056916,0.153168,0.135246,2.3125
1,4.A.1.a.1.I01.D02,0.156145,0.179877,0.154816,0.167123,0.16977,0.054917,0.059346,0.055845,0.15283,...,0.072561,0.083113,0.13131,0.10113,0.102404,0.140568,0.103373,0.159813,0.157843,
2,4.A.1.a.1.I01.D03,0.089065,0.099715,0.084327,0.096705,0.094823,0.030995,0.052313,0.030995,0.080266,...,0.065709,0.06395,0.070789,0.079326,0.102603,0.091844,0.059211,0.092985,0.089266,
3,4.A.1.a.1.I01.D04,0.076334,0.092418,0.075086,0.083131,0.085221,0.025643,0.027835,0.024728,0.071255,...,0.043833,0.040051,0.065194,0.04715,0.057468,0.06924,0.028762,0.090541,0.080906,2.153846
4,4.A.1.a.1.I02.D01,0.165505,0.209946,0.163474,0.179205,0.192848,0.059786,0.064802,0.059786,0.133272,...,0.087854,0.091222,0.153472,0.106361,0.116722,0.128531,0.116513,0.240791,0.221677,


In [69]:
cols = np.append(['DWA ID', 'DWA GT Rating'], all_features.columns[1:])

X = data[data['DWA GT Rating'].notnull()]
test = data[data['DWA GT Rating'].isnull()].reset_index()[cols]
y = X['DWA GT Rating']

X_o = data_o[data_o['DWA GT Rating'].notnull()]
test_o = data_o[data_o['DWA GT Rating'].isnull()].reset_index()[cols]
y_o = X['DWA GT Rating']
# X_o.head()
# test_o
X

Unnamed: 0,DWA ID,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,...,Stamina,Static Strength,Time Sharing,Trunk Strength,Visual Color Discrimination,Visualization,Wrist-Finger Speed,Written Comprehension,Written Expression,DWA GT Rating
0,4.A.1.a.1.I01.D01,0.624815,0.742852,0.645804,0.635395,0.673152,0.266106,0.316508,0.206054,0.558404,...,0.233278,0.249267,0.493312,0.289242,0.654582,0.689535,0.280644,0.736219,0.646977,2.312500
3,4.A.1.a.1.I01.D04,0.342866,0.412837,0.337737,0.374100,0.384661,0.117392,0.132003,0.113704,0.318195,...,0.184791,0.181936,0.293071,0.207983,0.267123,0.322353,0.131383,0.404740,0.364258,2.153846
11,4.A.1.a.1.I02.D08,0.147576,0.170157,0.160595,0.145807,0.171103,0.078662,0.088381,0.065297,0.132802,...,0.062567,0.068337,0.127265,0.080364,0.133650,0.154792,0.081241,0.180232,0.167259,3.333333
12,4.A.1.a.1.I02.D09,0.102298,0.116928,0.109148,0.116455,0.120613,0.082564,0.090142,0.065835,0.095108,...,0.105023,0.116004,0.099280,0.121839,0.103667,0.122674,0.076496,0.112045,0.097752,2.600000
13,4.A.1.a.1.I02.D10,0.172522,0.203817,0.187713,0.193304,0.205115,0.130413,0.106515,0.074795,0.162698,...,0.141055,0.159476,0.170713,0.156514,0.171995,0.178212,0.125120,0.199509,0.187286,3.059091
16,4.A.1.a.1.I02.D13,0.224354,0.252285,0.250656,0.231056,0.264198,0.264684,0.241676,0.199119,0.195863,...,0.199101,0.231732,0.204147,0.236925,0.242189,0.256610,0.190439,0.242584,0.215443,2.538462
27,4.A.1.a.1.I04.D01,0.663602,0.757573,0.585752,0.682749,0.713199,0.195251,0.195251,0.195251,0.517320,...,0.195251,0.195251,0.444292,0.413932,0.443281,0.478081,0.268122,0.736629,0.669365,3.666667
36,4.A.1.a.1.I05.D02,0.233164,0.280678,0.242742,0.201222,0.272625,0.065916,0.065916,0.065916,0.181055,...,0.071564,0.074696,0.173237,0.081286,0.130020,0.135974,0.074921,0.278879,0.265886,2.461538
38,4.A.1.a.1.I06.D02,0.611487,0.674733,0.631396,0.586710,0.690340,0.643718,0.581215,0.558718,0.557497,...,0.554544,0.602097,0.535110,0.610650,0.642452,0.625036,0.409713,0.630867,0.561742,2.470588
59,4.A.1.a.1.I12.D04,0.478790,0.619836,0.501416,0.493329,0.545674,0.137769,0.140283,0.137769,0.447208,...,0.235546,0.244937,0.379675,0.229312,0.270349,0.298398,0.140077,0.551021,0.540735,1.833333


# Save Data to file

- With and without Employment Weighting

In [70]:
## Uncomment to pickle somewhere: 

save_these = [(X, test, y), (X_o, test_o, y_o)]
file_names = ['tasks_by_skills_2018_v4.p', 'tasks_by_skills_2018_v4_o.p']

for file_name, save in zip(file_names, save_these):
     
    path = os.path.join(datasets, 'FoEmployment/Analysis_of_ONET_Tasks', file_name)
    f = open(path, "w")
    pickle.dump(save, f)
    f.close()
    print "saved here: %s " % path

saved here: /home/scpd/Datasets/FoEmployment/Analysis_of_ONET_Tasks/tasks_by_skills_2018_v4.p 
saved here: /home/scpd/Datasets/FoEmployment/Analysis_of_ONET_Tasks/tasks_by_skills_2018_v4_o.p 
