In [123]:
import pandas as pd
import numpy as np

df = pd.read_csv("aggregated-occupation-level-data.csv",index_col=0).reset_index()

levels_columns = [x for x in list(df.columns) if "_LVL" in x ]
importance_columns = [x for x in list(df.columns) if "_IMP" in x ]

# Melting the importance columns
importance_cols = [col for col in df.columns if col.endswith('_IMP')]
level_cols = [col for col in df.columns if col.endswith('_LVL')]

melted_imp = df.melt(id_vars=['OCCUPATION_2018_DESC'], value_vars=importance_cols, var_name='Skill', value_name='Importance')
melted_lvl = df.melt(id_vars=['OCCUPATION_2018_DESC'], value_vars=level_cols, var_name='Skill', value_name='Level')

# Cleaning the 'Skill' column to remove the suffix
melted_imp['Skill'] = melted_imp['Skill'].str.replace('_IMP', '')
melted_lvl['Skill'] = melted_lvl['Skill'].str.replace('_LVL', '')

df = melted_lvl.merge(melted_imp, on=["OCCUPATION_2018_DESC","Skill"])
df = df.merge(skill_ratings,on="Skill")
df["skill_rating"] = df["Level"]/5 * df["Importance"]/5 * df["percent_not_exposed"]


In [124]:
rankings = df.groupby("OCCUPATION_2018_DESC").aggregate({"percent_not_exposed":"first","skill_rating":"sum"}).reset_index()
rankings.columns = ["OCCUPATION_2018_DESC","percent_not_exposed","skill_diversity"]
rankings

Unnamed: 0,OCCUPATION_2018_DESC,percent_not_exposed,skill_diversity
0,Accountants and auditors,0.690523,2.092854
1,Actors,0.690523,1.668955
2,Actuaries,0.690523,2.164762
3,Acupuncturists,0.690523,1.721343
4,Administrative services managers,0.690523,2.360015
...,...,...,...
339,"Weighers, measurers, checkers, and samplers, r...",0.690523,1.625094
340,"Wholesale and retail buyers, except farm products",0.690523,2.373526
341,"Woodworking machine setters, operators, and te...",0.690523,1.567662
342,Word processors and typists,0.690523,1.111417


In [125]:
df = pd.read_csv("aggregated-occupation-level-data.csv",index_col=0).reset_index()
exposure = df[["OCCUPATION_2018_DESC",'HUMAN_NUMERIC',
 'GPT4_NUMERIC',
 'GPT4_AUTOMATION_NUMERIC']]
rankings = exposure.merge(rankings, on="OCCUPATION_2018_DESC").sort_values(by="skill_diversity",ascending=False)
rankings.to_csv("skill_diversity.csv")

In [81]:
exposed_tasks = pd.read_csv("../../input/gpts_labels/gpts_labels_new.csv")
importance_tasks = pd.read_csv("../../input/onet_2023/Task Ratings.csv",index_col=0).reset_index()
importance_tasks = importance_tasks[importance_tasks["Scale Name"] == "Importance"][["O*NET-SOC Code","Title","Task","Data Value"]].rename({"Data Value":"task_importance"},axis=1)
exposed_tasks = importance_tasks.merge(exposed_tasks, on=["Title","Task"])
exposed_tasks["task_importance"] = exposed_tasks["task_importance"]/5
exposed_tasks["task_exposure"] = (exposed_tasks["human_numeric"]+exposed_tasks["gpt_4_automation_numeric"])/2
exposed_tasks["weighted_task_exposure"] = exposed_tasks["task_exposure"] * exposed_tasks["task_importance"]

skills = pd.read_csv("../../output/parsed_BLS_data/dwa.csv", index_col=0)
exposed_tasks = exposed_tasks.merge(skills, on=["Task"])
skill_activities = pd.read_csv("../../input/onet_2023/Skills to Work Activities.csv").iloc[:,[1,3]]
skill_activities.columns = ["skill","activity"]
exposed_tasks = exposed_tasks.merge(skill_activities, on="activity")


skill_importance = exposed_tasks.groupby("skill")["task_importance"].sum().sort_values(ascending=False).reset_index().rename({"task_importance":"skill_importance_weighting_across_tasks"},axis=1)



skill_exposure = exposed_tasks.groupby("skill")["weighted_task_exposure"].sum().sort_values(ascending=False).reset_index().rename({"weighted_task_exposure":"skill_exposure_weighting_across_tasks"},axis=1)


skill_ratings = skill_importance.merge(skill_exposure,on="skill")
skill_ratings["percent_exposed"] = skill_ratings["skill_exposure_weighting_across_tasks"]/skill_ratings["skill_importance_weighting_across_tasks"]

skill_ratings["percent_not_exposed"] = 1- skill_ratings["percent_exposed"] 
skill_ratings = skill_ratings.sort_values(by="percent_not_exposed",ascending=False)
skill_ratings["Skill"] = skill_ratings["skill"].apply(lambda x: x.upper())


In [117]:
rankings

Unnamed: 0,OCCUPATION_2018_DESC,HUMAN_NUMERIC,GPT4_NUMERIC,GPT4_AUTOMATION_NUMERIC,skill_diversity
0,Chief executives,0.357143,0.489796,0.418367,3.538302
103,Coaches and scouts,0.160714,0.321429,0.321429,3.265397
56,Bioengineers and biomedical engineers,0.550000,0.600000,0.416667,3.215347
49,Network and computer systems administrators,0.525000,0.700000,0.562500,3.090564
275,"First-line supervisors of mechanics, installer...",0.295455,0.431818,0.386364,3.089184
...,...,...,...,...,...
164,Crossing guards and flaggers,0.250000,0.125000,0.125000,0.946387
234,Postal service mail carriers,0.340909,0.250000,0.397727,0.907347
252,"Graders and sorters, agricultural products",0.083333,0.166667,0.125000,0.758960
311,"Pressers, textile, garment, and related materials",0.017857,0.017857,0.017857,0.735544
