In [3]:
import pandas as pd
import os

# --- 1. Load All Three CSV Files ---
print("--- Loading Data Files ---")
onet_skills_path = os.path.join('..', 'data', 'raw', 'onet_skills.csv')
onet_occ_path = os.path.join('..', 'data', 'raw', 'onet_occupations.csv')
esco_skills_path = os.path.join('..', 'data', 'raw', 'esco_skills.csv')

try:
    onet_skills_df = pd.read_csv(onet_skills_path)
    onet_occ_df = pd.read_csv(onet_occ_path)
    esco_skills_df = pd.read_csv(esco_skills_path)
    print("Successfully loaded all three CSV files.")
except FileNotFoundError as e:
    print(f"Error: Could not find a file. {e}")
    print("Please make sure files are in the data/raw/ directory.")


# --- 2. Prepare the O*NET Data for Merging ---
print("\n--- Processing O*NET Data ---")
# Rename the columns for clarity and consistency
onet_skills_df.rename(columns={
    'O*NET-SOC Code': 'job_code',
    'Element Name': 'skill_name',
    'Data Value': 'importance_score'
}, inplace=True)

onet_occ_df.rename(columns={
    'O*NET-SOC Code': 'job_code',
    'Title': 'job_title'
}, inplace=True)

# Select only the columns we need
onet_skills_subset = onet_skills_df[['job_code', 'skill_name', 'importance_score']]
onet_occ_subset = onet_occ_df[['job_code', 'job_title']]


# --- 3. Merge the O*NET DataFrames ---
# Combine the two O*NET tables using their common column, 'job_code'
onet_merged_df = pd.merge(onet_skills_subset, onet_occ_subset, on='job_code')
print("Successfully merged O*NET skills and occupations data.")


# --- 4. Display the Final Merged O*NET DataFrame ---
print("\n--- Displaying Merged O*NET Data ---")
print("First 5 rows of the merged O*NET data:")
display(onet_merged_df.head())

print("\nExample: Skills for 'Software Developers' (Job Code: 15-1253.00)")
software_dev_skills = onet_merged_df[onet_merged_df['job_code'] == '15-1253.00']
display(software_dev_skills.head(10))

# --- 5. Display ESCO Data ---
print("\n--- Displaying ESCO Data ---")
print("First 5 rows of the ESCO skills data:")
display(esco_skills_df.head())

--- Loading Data Files ---
Successfully loaded all three CSV files.

--- Processing O*NET Data ---
Successfully merged O*NET skills and occupations data.

--- Displaying Merged O*NET Data ---
First 5 rows of the merged O*NET data:


Unnamed: 0,job_code,skill_name,importance_score,job_title
0,11-1011.00,Reading Comprehension,4.12,Chief Executives
1,11-1011.00,Reading Comprehension,4.62,Chief Executives
2,11-1011.00,Active Listening,4.0,Chief Executives
3,11-1011.00,Active Listening,4.75,Chief Executives
4,11-1011.00,Writing,4.12,Chief Executives



Example: Skills for 'Software Developers' (Job Code: 15-1253.00)


Unnamed: 0,job_code,skill_name,importance_score,job_title
7490,15-1253.00,Reading Comprehension,4.0,Software Quality Assurance Analysts and Testers
7491,15-1253.00,Reading Comprehension,4.12,Software Quality Assurance Analysts and Testers
7492,15-1253.00,Active Listening,3.88,Software Quality Assurance Analysts and Testers
7493,15-1253.00,Active Listening,4.0,Software Quality Assurance Analysts and Testers
7494,15-1253.00,Writing,3.75,Software Quality Assurance Analysts and Testers
7495,15-1253.00,Writing,3.88,Software Quality Assurance Analysts and Testers
7496,15-1253.00,Speaking,3.88,Software Quality Assurance Analysts and Testers
7497,15-1253.00,Speaking,4.0,Software Quality Assurance Analysts and Testers
7498,15-1253.00,Mathematics,2.75,Software Quality Assurance Analysts and Testers
7499,15-1253.00,Mathematics,2.75,Software Quality Assurance Analysts and Testers



--- Displaying ESCO Data ---
First 5 rows of the ESCO skills data:


Unnamed: 0,conceptType,conceptUri,skillType,reuseLevel,preferredLabel,altLabels,hiddenLabels,status,modifiedDate,scopeNote,definition,inScheme,description
0,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/0005c151-5b5a...,skill/competence,sector-specific,manage musical staff,manage staff of music\ncoordinate duties of mu...,,released,2023-11-30T15:53:37.136Z,,,http://data.europa.eu/esco/concept-scheme/skil...,Assign and manage staff tasks in areas such as...
1,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/00064735-8fad...,skill/competence,occupation-specific,supervise correctional procedures,oversee prison procedures\nmanage correctional...,,released,2023-11-30T15:04:00.689Z,,,http://data.europa.eu/esco/concept-scheme/memb...,Supervise the operations of a correctional fac...
2,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/000709ed-2be5...,skill/competence,sector-specific,apply anti-oppressive practices,apply non-oppressive practices\napply an anti-...,,released,2023-11-28T10:45:53.54Z,,,http://data.europa.eu/esco/concept-scheme/skil...,"Identify oppression in societies, economies, c..."
3,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/0007bdc2-dd15...,skill/competence,sector-specific,control compliance of railway vehicles regulat...,monitoring of compliance with railway vehicles...,,released,2023-11-30T16:29:18.273Z,,,http://data.europa.eu/esco/concept-scheme/skil...,"Inspect rolling stock, components and systems ..."
4,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/00090cc1-1f27...,skill/competence,cross-sector,identify available services,establish available services\ndetermine rehabi...,,released,2023-11-28T10:38:49.206Z,,,http://data.europa.eu/esco/concept-scheme/memb...,Identify the different services available for ...
