# About

This notebook converts Skills.xlsx into Skills.csv

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
import json

# Read the SOC mapping JSON file
with open('../data/soc_mapping.json', 'r') as f:
    soc_mapping = json.load(f)

df = pd.DataFrame.from_dict(soc_mapping, orient='index')

# Reset index to make the SOC codes a column
df = df.reset_index()
df = df.rename(columns={'index': 'SOC Code'})
# First remove the hyphen, then take the appropriate number of digits
df['normalized_SOC_Code'] = df['SOC Code'].str.replace('-', '')
df['normalized_major_code'] = df['SOC Code'].str.replace('-', '').str[:2]
df['normalized_minor_code'] = df['SOC Code'].str.replace('-', '').str[:3]
df['normalized_broad_code'] = df['SOC Code'].str.replace('-', '').str[:4]

print("\nColumns in the DataFrame:")
print(df.columns.tolist())


Columns in the DataFrame:
['SOC Code', 'detailed_title', 'major_code', 'major_title', 'minor_code', 'minor_title', 'broad_code', 'broad_title', 'normalized_SOC_Code', 'normalized_major_code', 'normalized_minor_code', 'normalized_broad_code']


In [4]:
# df.to_csv('../data/soc_mapping.csv', index=False)

In [5]:
skills_df = pd.read_excel('../data/ONET/Skills.xlsx')

In [6]:
skills_df.head()

Unnamed: 0,O*NET-SOC Code,Title,Element ID,Element Name,Scale ID,Scale Name,Data Value,N,Standard Error,Lower CI Bound,Upper CI Bound,Recommend Suppress,Not Relevant,Date,Domain Source
0,11-1011.00,Chief Executives,2.A.1.a,Reading Comprehension,IM,Importance,4.12,8,0.125,3.88,4.37,N,,08/2023,Analyst
1,11-1011.00,Chief Executives,2.A.1.a,Reading Comprehension,LV,Level,4.62,8,0.183,4.2664,4.9836,N,N,08/2023,Analyst
2,11-1011.00,Chief Executives,2.A.1.b,Active Listening,IM,Importance,4.0,8,0.0,4.0,4.0,N,,08/2023,Analyst
3,11-1011.00,Chief Executives,2.A.1.b,Active Listening,LV,Level,4.75,8,0.1637,4.4292,5.0708,N,N,08/2023,Analyst
4,11-1011.00,Chief Executives,2.A.1.c,Writing,IM,Importance,4.12,8,0.125,3.88,4.37,N,,08/2023,Analyst


In [7]:
# print unique values of Element Name
print(f"unique count of Element Name: {len(skills_df['Element Name'].unique())}")
print(f"unique values of Element Name: {skills_df['Element Name'].unique()}")

print(f"unique count of O*NET-SOC Code: {len(skills_df['O*NET-SOC Code'].unique())}")
print(f"unique count of SOC code without .specification: {len(skills_df['O*NET-SOC Code'].unique())}")

unique count of Element Name: 35
unique values of Element Name: ['Reading Comprehension' 'Active Listening' 'Writing' 'Speaking'
 'Mathematics' 'Science' 'Critical Thinking' 'Active Learning'
 'Learning Strategies' 'Monitoring' 'Social Perceptiveness' 'Coordination'
 'Persuasion' 'Negotiation' 'Instructing' 'Service Orientation'
 'Complex Problem Solving' 'Operations Analysis' 'Technology Design'
 'Equipment Selection' 'Installation' 'Programming'
 'Operations Monitoring' 'Operation and Control' 'Equipment Maintenance'
 'Troubleshooting' 'Repairing' 'Quality Control Analysis'
 'Judgment and Decision Making' 'Systems Analysis' 'Systems Evaluation'
 'Time Management' 'Management of Financial Resources'
 'Management of Material Resources' 'Management of Personnel Resources']
unique count of O*NET-SOC Code: 879
unique count of SOC code without .specification: 879


In [8]:
# Update SOC_Code column based on the condition
skills_df['SOC_Code'] = skills_df['O*NET-SOC Code'].apply(lambda x: x.split('.')[0] if '.' in x else x)

In [9]:
# skills_df.to_csv('Skills.csv', index=False)

In [10]:
skills_importance_df = skills_df[skills_df['Scale Name'] == 'Importance']

In [11]:
skills_importance_df  = skills_importance_df.pivot(
    index=['O*NET-SOC Code', 'Title'],
    columns='Element Name',
    values='Data Value'
).reset_index()

In [12]:
skills_importance_df['normalized_major_code'] = skills_importance_df['O*NET-SOC Code'].str.replace('-', '').str[:2]
skills_importance_df['normalized_minor_code'] = skills_importance_df['O*NET-SOC Code'].str.replace('-', '').str[:3]


In [13]:
skills_importance_df

Element Name,O*NET-SOC Code,Title,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,Judgment and Decision Making,Learning Strategies,Management of Financial Resources,Management of Material Resources,Management of Personnel Resources,Mathematics,Monitoring,Negotiation,Operation and Control,Operations Analysis,Operations Monitoring,Persuasion,Programming,Quality Control Analysis,Reading Comprehension,Repairing,Science,Service Orientation,Social Perceptiveness,Speaking,Systems Analysis,Systems Evaluation,Technology Design,Time Management,Troubleshooting,Writing,normalized_major_code,normalized_minor_code
0,11-1011.00,Chief Executives,3.75,4.00,4.38,4.25,4.38,1.00,1.12,1.00,3.38,4.75,3.12,4.25,4.00,4.25,3.25,4.00,4.12,1.88,3.12,2.00,4.00,1.75,1.88,4.12,1.00,1.62,3.12,4.12,4.25,4.12,4.25,1.75,4.00,1.50,4.12,11,111
1,11-1011.03,Chief Sustainability Officers,3.75,4.00,4.00,3.75,4.12,1.00,1.12,1.00,3.25,3.88,3.38,2.88,2.25,3.12,2.88,3.75,3.12,2.00,2.88,2.00,3.88,1.88,1.88,4.00,1.00,2.12,3.25,3.88,4.00,3.88,3.88,1.88,3.38,1.00,4.12,11,111
2,11-1021.00,General and Operations Managers,3.62,4.00,3.62,3.88,3.88,1.00,1.00,1.00,3.00,3.62,3.00,3.00,3.12,3.75,2.62,4.00,3.50,1.88,2.50,2.25,3.62,1.50,2.38,4.00,1.00,1.50,3.25,3.75,4.00,3.12,3.12,1.50,3.62,1.75,3.50,11,111
3,11-2011.00,Advertising and Promotions Managers,3.25,4.12,3.50,3.50,4.00,1.00,1.12,1.00,2.88,3.75,3.00,2.75,2.62,3.12,3.00,3.25,3.12,1.00,2.75,1.62,3.38,1.75,1.62,3.75,1.00,1.62,3.12,4.00,4.00,3.12,3.12,1.75,3.50,1.00,3.75,11,112
4,11-2021.00,Marketing Managers,3.88,3.88,3.62,3.50,3.88,1.00,1.00,1.00,3.00,3.75,3.12,2.88,2.62,3.38,2.75,3.75,3.62,1.00,3.38,1.75,3.75,1.88,1.88,3.88,1.00,1.75,3.12,3.88,3.88,3.25,3.50,1.75,3.50,1.00,3.25,11,112
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,53-7071.00,Gas Compressor and Gas Pumping Station Operators,2.88,3.12,3.00,3.00,3.62,3.12,2.12,1.00,2.50,3.00,2.25,1.62,2.00,2.25,2.25,3.12,2.25,3.88,1.00,4.00,2.25,1.38,3.00,3.12,3.00,1.88,2.25,2.75,3.00,2.62,2.00,1.88,3.00,3.12,3.00,53,537
875,53-7072.00,"Pump Operators, Except Wellhead Pumpers",2.88,3.12,2.88,2.88,3.12,2.88,2.25,1.12,2.88,3.00,2.12,2.00,2.25,2.62,2.38,3.50,2.25,3.50,1.88,3.88,2.38,1.75,2.88,3.12,2.75,2.00,2.38,2.75,3.12,2.38,2.12,1.88,3.00,3.00,2.88,53,537
876,53-7073.00,Wellhead Pumpers,2.38,2.88,3.00,2.25,3.12,3.12,2.25,1.25,2.12,3.12,1.88,1.38,1.75,2.12,2.25,3.12,2.00,3.88,1.38,4.00,2.00,1.25,2.38,2.75,3.12,1.12,1.88,2.50,3.00,2.00,2.00,1.50,2.75,3.12,2.62,53,537
877,53-7081.00,Refuse and Recyclable Material Collectors,2.25,2.88,2.38,2.62,2.75,2.75,1.75,1.00,1.88,2.38,1.38,1.00,1.00,2.00,1.00,2.50,2.00,3.00,1.12,3.00,2.00,1.00,2.25,2.62,2.50,1.00,2.38,2.50,2.88,1.38,1.38,1.00,2.50,2.50,2.50,53,537


In [14]:
skills_importance_df.sort_values(by='Operations Analysis', ascending=False)[:30][['O*NET-SOC Code', 'Title', 'Operations Analysis']]

Element Name,O*NET-SOC Code,Title,Operations Analysis
123,17-1011.00,"Architects, Except Landscape and Naval",4.0
128,17-2011.00,Aerospace Engineers,3.88
152,17-2161.00,Nuclear Engineers,3.75
326,27-1027.00,Set and Exhibit Designers,3.75
130,17-2031.00,Bioengineers and Biomedical Engineers,3.75
117,15-2031.00,Operations Research Analysts,3.75
150,17-2141.02,Automotive Engineers,3.75
113,15-1299.08,Computer Systems Engineers/Architects,3.75
131,17-2041.00,Chemical Engineers,3.62
34,11-9111.00,Medical and Health Services Managers,3.62


In [15]:
# skills_importance_df.to_csv('../data/skills/skills_importance.csv', index=False)

In [16]:
# Define your skill_categories mapping
skill_categories = {
    'basic_skills': [
        'Reading Comprehension', 'Active Listening', 'Writing', 'Speaking',
    ],
    'cognitive_skills': [
        'Critical Thinking', 'Active Learning', 'Learning Strategies',
        'Monitoring', 'Complex Problem Solving', 'Judgment and Decision Making', 'Operations Analysis'
    ],
    'social_skills': [
        'Social Perceptiveness', 'Coordination', 'Persuasion',
        'Negotiation', 'Instructing', 'Service Orientation'
    ],
    'operations_skills': [
        'Operation and Control', 'Operations Monitoring', 'Quality Control Analysis', 'Troubleshooting'
    ],
    'maintenance_skills': [
        'Equipment Selection', 'Installation', 'Equipment Maintenance', 'Repairing'
    ],
    'technical_skills': [
        'Technology Design', 'Programming', 'Mathematics', 'Science'
    ],
    'management_skills': [
        'Systems Analysis', 'Systems Evaluation', 'Time Management',
        'Management of Financial Resources', 'Management of Material Resources',
        'Management of Personnel Resources'
    ]
}

# Prepare a dict for the new DataFrame
new_data = {
    'O*NET-SOC Code': skills_importance_df['O*NET-SOC Code'],
    'Title': skills_importance_df['Title'],
}

# For each category, compute the mean of the mapped columns and add as a new column
for cat, skills in skill_categories.items():
    present_skills = [s for s in skills if s in skills_importance_df.columns]
    new_data[cat] = skills_importance_df[present_skills].mean(axis=1)

# Create the new DataFrame
skills_combined_df = pd.DataFrame(new_data)

# Display the first few rows to check
display(skills_combined_df.head())

Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills
0,11-1011.00,Chief Executives,4.1225,3.928571,3.831667,1.815,1.03,2.0925,4.145
1,11-1011.03,Chief Sustainability Officers,4.03,3.68,3.521667,1.72,1.03,2.19,3.231667
2,11-1021.00,General and Operations Managers,3.875,3.462857,3.5,2.065,1.0,1.78,3.288333
3,11-2011.00,Advertising and Promotions Managers,3.905,3.357143,3.333333,1.31,1.03,2.03,3.038333
4,11-2021.00,Marketing Managers,3.7225,3.625714,3.478333,1.4075,1.0,2.0325,3.188333


In [17]:
skills_combined_df.columns

Index(['O*NET-SOC Code', 'Title', 'basic_skills', 'cognitive_skills',
       'social_skills', 'operations_skills', 'maintenance_skills',
       'technical_skills', 'management_skills'],
      dtype='object')

In [18]:
# import re
# ------------------------------------------------------------
# 1.  sector‑growth keywords for the WEF “discount” step
# ------------------------------------------------------------
# GROWTH_SECTOR_PATTERNS = re.compile(
#     r"(nurs|therap|counsel|teacher|educat|"      # care & education
#     r"ai\b|ml\b|machine learning|data|cyber|"    # digital / AI / security
#     r"engineer|developer|analyst|"               # generic digital titles
#     r"renewable|solar|wind|green|sustain|"       # green transition
#     r"project manager|operations manager)",      # leadership / project
#     flags=re.I
# )

# ------------------------------------------------------------
# 2.  helper to normalize each 1‑to‑5 skill bucket to 0‑1
# ------------------------------------------------------------
def _norm(series: pd.Series) -> pd.Series:
    return (series - 1.0) / 4.0

# ------------------------------------------------------------
# 3.  main scorer
# ------------------------------------------------------------
def add_automation_risk(df: pd.DataFrame, debug: bool = False) -> pd.DataFrame:
    """Append a 0‑100 'automation_risk_score' column to the skills dataframe.

    Expected numeric columns (1‑5 scale):
      basic_skills, cognitive_skills, social_skills,
      operations_skills, maintenance_skills,
      technical_skills, management_skills
    """
    # 1. normalise buckets
    basic_skills  = _norm(df["basic_skills"])
    cognitive_skills  = _norm(df["cognitive_skills"])
    social_skills  = _norm(df["social_skills"])
    operations_skills  = _norm(df["operations_skills"])
    maintenance_skills  = _norm(df["maintenance_skills"])
    technical_skills  = _norm(df["technical_skills"])
    management_skills  = _norm(df["management_skills"])

    # 2. composite indices
    field_intensity = 0.3 * operations_skills + 0.7 * maintenance_skills
    # human_capital     = (
    #     0.60 * social_skills + 0.40 * management_skills # + 0.20 * cognitive_skills + 0.10 * basic_skills
    # )   # ↑ social/management weight, ↓ basic  #v1 - works for food grader

    human_capital     = (
        np.maximum(social_skills, management_skills) # any such skill heavily required would be a good indicator of human capital
    )   # ↑ social/management weight, ↓ basic
    
    tech_shield       = 0.30 * technical_skills + 0.10 * cognitive_skills 
    # tech shield is the sum of technical and cognitive skills - helps in longer term job creation but not in short term

    tech_composition = (technical_skills) / (technical_skills + management_skills + social_skills + maintenance_skills)

    # 3. raw risk (0‑1) with updated weights
    # Long-term
    # raw = (
    #     0.55 * routine_intensity
    #     + 0.30 * (1 - human_capital)
    #     + 0.15 * (1 - tech_shield)
    # )

    # Modified
    # Short-term
    raw = (
        0.5 * (tech_composition)  # The more technical the job, the higher the risk
        + 0.3 * (1 - field_intensity)
        + 0.2 * (1 - human_capital)
        # + 0.15 * (1 - tech_shield)
    )

    # 4. WEF growth‑sector discount (‑30 %) – stricter threshold
    # mask_growth = df["Title"].str.contains(GROWTH_SECTOR_PATTERNS, na=False) & (
    #     (social_skills >= 0.60) | (technical_skills >= 0.60)   # need ≥3.4 in raw 1‑5 scale
    # )

    # raw = raw.mask(mask_growth, raw * 0.70)

    # 5. scale to 0‑100
    df = df.copy()
    df["automation_risk"] = (raw * 100).round(1)
    df["automation_risk_score"] = (
        100 / (1 + np.exp(-4 * (raw - 0.40)))
    ).round(1)


    if debug:
        print(f"tech_composition: {tech_composition}")
        print(f"1 - field_intensity: {1 - field_intensity}")
        print(f"1 - human_capital: {1 - human_capital}")
        
        print(f"automation_risk: {raw}")
        print(f"automation_risk_score: {df['automation_risk_score']}")

    return df



In [19]:
# ------------------------------------------------------------
# 4.  usage
# ------------------------------------------------------------
skills_combined_df = add_automation_risk(skills_combined_df)

In [20]:
print(f"median: {skills_combined_df['automation_risk_score'].median()}")
print(f"std: {skills_combined_df['automation_risk_score'].std()}")
print(f"min: {skills_combined_df['automation_risk_score'].min()}")
print(f"max: {skills_combined_df['automation_risk_score'].max()}")
print(f"iqr: {skills_combined_df['automation_risk_score'].quantile(0.75) - skills_combined_df['automation_risk_score'].quantile(0.25)}")
print(f"mean: {skills_combined_df['automation_risk_score'].mean()}")

median: 54.2
std: 5.698288675646374
min: 38.3
max: 71.7
iqr: 7.700000000000003
mean: 53.720136518771334


In [21]:
skills_combined_df.sort_values(by='automation_risk_score', ascending=False)[:40][['O*NET-SOC Code', 'Title', 'automation_risk_score', 'automation_risk', 'basic_skills', 'cognitive_skills', 'social_skills', 'operations_skills', 'maintenance_skills', 'technical_skills', 'management_skills']]

Unnamed: 0,O*NET-SOC Code,Title,automation_risk_score,automation_risk,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills
116,15-2021.00,Mathematicians,71.7,63.2,3.565,3.535714,2.5,1.28,1.0,3.185,2.435
118,15-2041.00,Statisticians,71.1,62.6,3.8775,3.43,2.543333,1.47,1.0,3.25,2.48
119,15-2041.01,Biostatisticians,69.2,60.3,3.94,3.625714,2.75,1.3725,1.125,3.405,2.581667
601,43-9111.00,Statistical Assistants,68.1,58.9,3.5025,3.142857,2.793333,1.4075,1.0,2.845,2.271667
106,15-1251.00,Computer Programmers,67.9,58.8,3.375,3.195714,2.601667,2.28,1.0625,3.3425,2.706667
122,15-2099.01,Bioinformatics Technicians,66.8,57.5,3.5325,3.16,2.731667,1.7775,1.09,2.935,2.518333
117,15-2031.00,Operations Research Analysts,66.6,57.2,4.0,3.644286,2.791667,1.5325,1.0,3.03,2.876667
205,19-2099.01,Remote Sensing Scientists and Technologists,66.0,56.5,3.91,3.518571,2.813333,2.095,1.125,3.2225,2.771667
196,19-2021.00,Atmospheric and Space Scientists,66.0,56.5,4.0,3.498571,2.896667,1.435,1.0,2.7825,2.563333
111,15-1299.02,Geographic Information Systems Technologists a...,66.0,56.6,3.5,2.945714,2.645,1.53,1.0625,2.47,2.311667


In [22]:
add_automation_risk(df=skills_combined_df[skills_combined_df['O*NET-SOC Code'] == '15-1251.00'], debug=True)

tech_composition: 106    0.410006
dtype: float64
1 - field_intensity: 106    0.893062
dtype: float64
1 - human_capital: 106    0.573333
dtype: float64
automation_risk: 106    0.587588
dtype: float64
automation_risk_score: 106    67.9
Name: automation_risk_score, dtype: float64


Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
106,15-1251.00,Computer Programmers,3.375,3.195714,2.601667,2.28,1.0625,3.3425,2.706667,58.8,67.9


In [23]:
add_automation_risk(df=skills_combined_df[skills_combined_df['O*NET-SOC Code'] == '41-9012.00'], debug=True)

tech_composition: 545    0.105882
dtype: float64
1 - field_intensity: 545    0.99775
dtype: float64
1 - human_capital: 545    0.72375
dtype: float64
automation_risk: 545    0.497016
dtype: float64
automation_risk_score: 545    59.6
Name: automation_risk_score, dtype: float64


Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
545,41-9012.00,Models,2.5,1.84,2.105,1.03,1.0,1.1875,1.478333,49.7,59.6


In [24]:
add_automation_risk(df=skills_combined_df[skills_combined_df['O*NET-SOC Code'] == '31-9011.00'], debug=True)

tech_composition: 443    0.198623
dtype: float64
1 - field_intensity: 443    0.971688
dtype: float64
1 - human_capital: 443    0.619583
dtype: float64
automation_risk: 443    0.514734
dtype: float64
automation_risk_score: 443    61.3
Name: automation_risk_score, dtype: float64


Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
443,31-9011.00,Massage Therapists,3.31,2.784286,2.521667,1.3775,1.0,1.625,2.0,51.5,61.3


In [25]:
skills_importance_df[
    skills_importance_df['O*NET-SOC Code'].isin(['31-9011.00', '29-1022.00', '27-2011.00' , '45-2021.00'])
]

Element Name,O*NET-SOC Code,Title,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,Judgment and Decision Making,Learning Strategies,Management of Financial Resources,Management of Material Resources,Management of Personnel Resources,Mathematics,Monitoring,Negotiation,Operation and Control,Operations Analysis,Operations Monitoring,Persuasion,Programming,Quality Control Analysis,Reading Comprehension,Repairing,Science,Service Orientation,Social Perceptiveness,Speaking,Systems Analysis,Systems Evaluation,Technology Design,Time Management,Troubleshooting,Writing,normalized_major_code,normalized_minor_code
327,27-2011.00,Actors,2.62,3.75,2.88,2.88,3.0,1.0,1.0,1.0,2.75,2.88,2.75,1.0,1.0,2.38,1.0,3.0,2.5,1.0,1.75,1.25,2.5,1.0,1.0,3.88,1.0,1.5,2.12,3.75,3.88,2.0,2.0,1.25,3.0,1.0,2.88,27,272
356,29-1022.00,Oral and Maxillofacial Surgeons,3.88,3.88,4.12,3.62,4.0,1.88,2.12,1.0,2.88,4.12,2.88,2.0,2.0,2.75,2.62,3.88,2.62,2.88,3.12,2.75,2.75,1.5,2.75,4.0,1.88,3.5,3.25,3.75,3.88,3.25,3.0,2.0,3.38,1.88,3.5,29,291
443,31-9011.00,Massage Therapists,3.0,3.62,3.0,2.38,3.0,1.0,1.0,1.0,2.25,3.12,2.75,1.38,1.62,1.75,2.0,3.0,2.0,1.0,1.62,1.38,2.0,1.0,1.75,3.0,1.0,1.75,3.25,3.25,3.62,2.12,2.25,1.75,2.88,1.38,3.0,31,319
604,45-2021.00,Animal Breeders,3.0,3.0,3.0,2.5,3.25,1.0,1.38,1.0,1.88,3.0,2.12,2.12,2.0,2.0,2.12,3.0,1.75,2.62,1.62,2.88,2.38,1.5,2.62,2.88,1.0,3.0,2.38,2.62,3.0,2.38,2.12,1.25,2.75,2.0,2.75,45,452


In [26]:
add_automation_risk(skills_combined_df[skills_combined_df['O*NET-SOC Code'] == '27-2011.00'], debug=True)

tech_composition: 327    0.066157
dtype: float64
1 - field_intensity: 327    0.995313
dtype: float64
1 - human_capital: 327    0.5625
dtype: float64
automation_risk: 327    0.444172
dtype: float64
automation_risk_score: 327    54.4
Name: automation_risk_score, dtype: float64


Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
327,27-2011.00,Actors,3.5975,2.697143,2.75,1.0625,1.0,1.1875,1.896667,44.4,54.4


In [27]:
skills_combined_df[skills_combined_df['Title'] == 'Dancers']

Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
335,27-2031.00,Dancers,2.845,2.535714,2.605,1.095,1.0,1.125,1.835,44.2,54.2


In [28]:
add_automation_risk(skills_combined_df[skills_combined_df['O*NET-SOC Code'] == '27-3023.00'], debug=True)

tech_composition: 340    0.142135
dtype: float64
1 - field_intensity: 340    0.957812
dtype: float64
1 - human_capital: 340    0.489583
dtype: float64
automation_risk: 340    0.456328
dtype: float64
automation_risk_score: 340    55.6
Name: automation_risk_score, dtype: float64


Unnamed: 0,O*NET-SOC Code,Title,basic_skills,cognitive_skills,social_skills,operations_skills,maintenance_skills,technical_skills,management_skills,automation_risk,automation_risk_score
340,27-3023.00,"News Analysts, Reporters, and Journalists",4.0,2.858571,3.041667,1.5625,1.0,1.5625,2.353333,45.6,55.6


In [29]:
skills_combined_df.to_csv('../data/skills/skills_based_risk.csv', index=False)