# Add NACE Code Information

Extract and enrich NACE (Statistical Classification of Economic Activities) code information from RDF data to enhance occupation matches.

## 0. Setup

### 0.01 Import Required Libraries

In [102]:
import pandas as pd
import re
from functools import lru_cache
from pathlib import Path
from rdflib import Graph, Namespace
from rdflib.namespace import RDF, RDFS, SKOS

# Import our config
import sys
sys.path.append(str(Path.cwd().parent))
from src.config import load_config

### 0.02 Load Configuration

In [103]:
# Get project root
project_root = Path.cwd().parent

# Load project config
config = load_config()

print("✓ Configuration loaded")

✓ Configuration loaded


### 0.03 Set Up Paths

In [104]:
# Set project ID
project_id = "P075941"

# Get paths
nace_rdf_path = project_root / "data" / "bronze" / "nace" / "NACE_Rev.2.1.rdf"

# ESCO data paths
esco_dir = project_root / "data" / "bronze" / "esco"
occupations_file = esco_dir / "occupations_en.csv"

print(f"ESCO file exists: {occupations_file.exists()}")

print(f"Project ID: {project_id}")
print(f"ESCO occupations file: {occupations_file}")

print(f"NACE RDF file: {nace_rdf_path}")
print(f"NACE RDF exists: {nace_rdf_path.exists()}")

ESCO file exists: True
Project ID: P075941
ESCO occupations file: /Users/lauren/repos/PAD2Skills/data/bronze/esco/occupations_en.csv
NACE RDF file: /Users/lauren/repos/PAD2Skills/data/bronze/nace/NACE_Rev.2.1.rdf
NACE RDF exists: True


## 1. Inspect NACE RDF Data

### 1.01 Load NACE RDF File

In [105]:
# Create RDF graph
g = Graph()

# Load the NACE RDF file (RDF/XML format)
print(f"Loading NACE RDF data from: {nace_rdf_path}")
g.parse(str(nace_rdf_path), format="xml")

print(f"\n✓ Loaded NACE RDF data")
print(f"  Total triples: {len(g):,}")

Loading NACE RDF data from: /Users/lauren/repos/PAD2Skills/data/bronze/nace/NACE_Rev.2.1.rdf

✓ Loaded NACE RDF data
  Total triples: 71,472


## 2. Extract NACE Groups and Descriptions


### 2.01 Query Sections, Divisions, and Groups

In [106]:
# Extract:
# - sections: URI tail is single letter A-Z
# - divisions: URI tail is 2 digits
# - groups: URI tail is 3 digits
# Group links to its parent division via skos:broader, and division links to section via skos:broader.

SECTION_RE = re.compile(r"/[A-Z]$")
DIV_RE = re.compile(r"/[0-9]{2}$")
GROUP_RE = re.compile(r"/[0-9]{3}$")

@lru_cache(maxsize=200_000)
def label_en(uri):
    """Return the English skos:prefLabel for a URI, if present."""
    for lbl in g.objects(uri, SKOS.prefLabel):
        if getattr(lbl, "language", None) == "en":
            return str(lbl)
    return None

print("Extracting sections, divisions, and groups (fast traversal)...")

rows = []
seen_groups = set()

# Iterate only Concepts once, filter to group URIs by string tail.
for subj in g.subjects(RDF.type, SKOS.Concept):
    u = str(subj)
    if not GROUP_RE.search(u):
        continue

    if subj in seen_groups:
        continue
    seen_groups.add(subj)

    # group -> division
    division = next(g.objects(subj, SKOS.broader), None)
    if division is None:
        continue
    div_u = str(division)
    if not DIV_RE.search(div_u):
        # If the dataset has any oddities (e.g., group broader not exactly division),
        # you can climb broader until you hit a division.
        cur = division
        for _ in range(5):
            if cur is None:
                break
            cur_u = str(cur)
            if DIV_RE.search(cur_u):
                division = cur
                div_u = cur_u
                break
            cur = next(g.objects(cur, SKOS.broader), None)
        else:
            continue

    # division -> section
    section = next(g.objects(division, SKOS.broader), None)
    if section is None:
        continue
    sec_u = str(section)
    if not SECTION_RE.search(sec_u):
        # climb broader until section
        cur = section
        for _ in range(5):
            if cur is None:
                break
            cur_u = str(cur)
            if SECTION_RE.search(cur_u):
                section = cur
                sec_u = cur_u
                break
            cur = next(g.objects(cur, SKOS.broader), None)
        else:
            continue

    rows.append({
        "section_uri": sec_u,
        "section_code": sec_u.rsplit("/", 1)[-1],
        "section_label_en": label_en(section),

        "division_uri": div_u,
        "division_code": div_u.rsplit("/", 1)[-1],
        "division_label_en": label_en(division),

        "group_uri": u,
        "group_code": u.rsplit("/", 1)[-1],
        "group_label_en": label_en(subj),
    })

sections_divisions_groups_df = pd.DataFrame(rows).drop_duplicates(subset=["group_uri"])

print(f"✓ Extracted {len(sections_divisions_groups_df)} groups")
print(f"  Unique sections:  {sections_divisions_groups_df['section_code'].nunique()}")
print(f"  Unique divisions: {sections_divisions_groups_df['division_code'].nunique()}")
print(f"  Unique groups:    {sections_divisions_groups_df['group_code'].nunique()}")
sections_divisions_groups_df.head()

Extracting sections, divisions, and groups (fast traversal)...
✓ Extracted 287 groups
  Unique sections:  22
  Unique divisions: 87
  Unique groups:    287


Unnamed: 0,section_uri,section_code,section_label_en,division_uri,division_code,division_label_en,group_uri,group_code,group_label_en
0,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/011,11,01.1 Growing of non-perennial crops
1,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/012,12,01.2 Growing of perennial crops
2,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/013,13,01.3 Plant propagation
3,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/014,14,01.4 Animal production
4,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/015,15,01.5 Mixed farming


### 2.02 Query Group Notes (Descriptions)

In [107]:
# Extract detailed notes/descriptions for GROUPS using xkos vocabulary
# These provide rich context about what each group includes/excludes

q_group_notes = """
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xkos: <http://rdf-vocabulary.ddialliance.org/xkos#>

SELECT ?group ?core ?addl ?excl
WHERE {
  ?group a skos:Concept .
  FILTER(regex(str(?group), "/[0-9]{3}$"))

  OPTIONAL { ?group xkos:coreContentNote ?core . }
  OPTIONAL { ?group xkos:additionalContentNote ?addl . }
  OPTIONAL { ?group xkos:exclusionNote ?excl . }
}
"""

print("Extracting group notes (descriptions)...")
note_rows = []
for r in g.query(q_group_notes):
    note_rows.append({
        "group_uri": str(r.group),
        "core_content_note": str(r.core) if r.core else None,
        "additional_content_note": str(r.addl) if r.addl else None,
        "exclusion_note": str(r.excl) if r.excl else None,
    })

group_notes_df = pd.DataFrame(note_rows).drop_duplicates(subset=["group_uri"])

print(f"\n✓ Extracted notes for {len(group_notes_df)} groups")
print(f"\nNote availability:")
print(f"  Core content notes: {group_notes_df['core_content_note'].notna().sum()} ({group_notes_df['core_content_note'].notna().mean():.1%})")
print(f"  Additional content notes: {group_notes_df['additional_content_note'].notna().sum()} ({group_notes_df['additional_content_note'].notna().mean():.1%})")
print(f"  Exclusion notes: {group_notes_df['exclusion_note'].notna().sum()} ({group_notes_df['exclusion_note'].notna().mean():.1%})")

group_notes_df.head()

Extracting group notes (descriptions)...

✓ Extracted notes for 287 groups

Note availability:
  Core content notes: 99 (34.5%)
  Additional content notes: 21 (7.3%)
  Exclusion notes: 44 (15.3%)


Unnamed: 0,group_uri,core_content_note,additional_content_note,exclusion_note
0,http://data.europa.eu/ux2/nace2.1/011,This group includes the growing of non-perenni...,,
1,http://data.europa.eu/ux2/nace2.1/012,This group includes the growing of perennial c...,,This group excludes:\n- growing of perennial f...
2,http://data.europa.eu/ux2/nace2.1/013,,,
3,http://data.europa.eu/ux2/nace2.1/014,"This group includes:\n- farming (husbandry, ra...",,This group excludes:\n- farm animal boarding a...
4,http://data.europa.eu/ux2/nace2.1/015,,,


### 2.03 Merge Groups with Notes

In [108]:
# Merge group metadata with group descriptive notes (xkos)
# - sections_divisions_groups_df: one row per group with section/division/group fields
# - group_notes_df: one row per group_uri with core/additional/exclusion notes

print("Merging groups with group notes...")

# Optional: ensure group_notes_df is unique on group_uri before merging
group_notes_df = group_notes_df.drop_duplicates(subset=["group_uri"])

# Left merge on group_uri
sections_divisions_groups_df = sections_divisions_groups_df.merge(
    group_notes_df,
    on="group_uri",
    how="left",
    validate="m:1"  # many groups in left, one note row per group_uri on right
)

print("✓ Merged groups with notes")
print(f"  Total rows: {len(sections_divisions_groups_df):,}")
print(f"  Non-missing group_label_en: {sections_divisions_groups_df['group_label_en'].notna().sum():,}")
print(f"  Groups with any note: {sections_divisions_groups_df[['core_content_note','additional_content_note','exclusion_note']].notna().any(axis=1).sum():,}")
print(f"  Non-missing core_content_note: {sections_divisions_groups_df['core_content_note'].notna().sum():,}")
print(f"  Non-missing additional_content_note: {sections_divisions_groups_df['additional_content_note'].notna().sum():,}")
print(f"  Non-missing exclusion_note: {sections_divisions_groups_df['exclusion_note'].notna().sum():,}")

sections_divisions_groups_df.head()

Merging groups with group notes...
✓ Merged groups with notes
  Total rows: 287
  Non-missing group_label_en: 287
  Groups with any note: 108
  Non-missing core_content_note: 99
  Non-missing additional_content_note: 21
  Non-missing exclusion_note: 44


Unnamed: 0,section_uri,section_code,section_label_en,division_uri,division_code,division_label_en,group_uri,group_code,group_label_en,core_content_note,additional_content_note,exclusion_note
0,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/011,11,01.1 Growing of non-perennial crops,This group includes the growing of non-perenni...,,
1,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/012,12,01.2 Growing of perennial crops,This group includes the growing of perennial c...,,This group excludes:\n- growing of perennial f...
2,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/013,13,01.3 Plant propagation,,,
3,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/014,14,01.4 Animal production,"This group includes:\n- farming (husbandry, ra...",,This group excludes:\n- farm animal boarding a...
4,http://data.europa.eu/ux2/nace2.1/A,A,"A AGRICULTURE, FORESTRY AND FISHING",http://data.europa.eu/ux2/nace2.1/01,1,"01 Crop and animal production, hunting and rel...",http://data.europa.eu/ux2/nace2.1/015,15,01.5 Mixed farming,,,


### 2.04 Sanity Checks

In [109]:
# Verify the extracted data makes sense
print("NACE Group Extraction Summary")
print("=" * 80)
print(f"Total groups:    {len(sections_divisions_groups_df)}")
print(f"Unique sections: {sections_divisions_groups_df['section_code'].nunique()}")
print(f"Unique divisions:{sections_divisions_groups_df['division_code'].nunique()}")
print(f"Unique groups:   {sections_divisions_groups_df['group_code'].nunique()}")
print()
print("Sections (A-Z):")
section_counts = (
    sections_divisions_groups_df
    .groupby(['section_code', 'section_label_en'])
    .size()
    .reset_index(name='num_groups')
    .sort_values(['section_code'])
)
for _, row in section_counts.iterrows():
    print(f"  {row['section_code']:2s} — {row['section_label_en']:60s} ({row['num_groups']:3d} groups)")
print()
print("Note availability:")
print(f"  Core content notes:       {sections_divisions_groups_df['core_content_note'].notna().sum():4d} / {len(sections_divisions_groups_df):4d} ({sections_divisions_groups_df['core_content_note'].notna().mean():.1%})")
print(f"  Additional content notes: {sections_divisions_groups_df['additional_content_note'].notna().sum():4d} / {len(sections_divisions_groups_df):4d} ({sections_divisions_groups_df['additional_content_note'].notna().mean():.1%})")
print(f"  Exclusion notes:          {sections_divisions_groups_df['exclusion_note'].notna().sum():4d} / {len(sections_divisions_groups_df):4d} ({sections_divisions_groups_df['exclusion_note'].notna().mean():.1%})")
print()
print("Sample groups by section:")
print(sections_divisions_groups_df.groupby('section_code').head(2)[['section_code', 'division_code', 'group_code', 'group_label_en']])

NACE Group Extraction Summary
Total groups:    287
Unique sections: 22
Unique divisions:87
Unique groups:   287

Sections (A-Z):
  A  — A AGRICULTURE, FORESTRY AND FISHING                          ( 14 groups)
  B  — B MINING AND QUARRYING                                       ( 10 groups)
  C  — C MANUFACTURING                                              ( 92 groups)
  D  — D ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY        (  4 groups)
  E  — E WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES (  6 groups)
  F  — F CONSTRUCTION                                               ( 11 groups)
  G  — G WHOLESALE AND RETAIL TRADE                                 ( 18 groups)
  H  — H TRANSPORTATION AND STORAGE                                 ( 17 groups)
  I  — I ACCOMMODATION AND FOOD SERVICE ACTIVITIES                  (  9 groups)
  J  — J PUBLISHING, BROADCASTING, AND CONTENT PRODUCTION AND DISTRIBUTION ACTIVITIES (  7 groups)
  K  — K TELECOMMUNICATION, COMPUT

### 2.05 Create Embedding Description

In [110]:
# Create a combined embedding description field for semantic matching
def create_embedding_description(row):
    """Combine section, division, group labels and notes for embedding."""
    parts = []

    if pd.notna(row.get("section_label_en")):
        parts.append(row["section_label_en"])

    if pd.notna(row.get("division_label_en")):
        parts.append(row["division_label_en"])

    if pd.notna(row.get("group_label_en")):
        parts.append(row["group_label_en"])

    if pd.notna(row.get("core_content_note")):
        parts.append(row["core_content_note"])

    if pd.notna(row.get("additional_content_note")):
        parts.append(row["additional_content_note"])

    return " ".join(parts) if parts else None

sections_divisions_groups_df["embedding_description"] = sections_divisions_groups_df.apply(
    create_embedding_description, axis=1
)

print(f"✓ Created embedding_description column")
print(f"  Groups with embedding descriptions: {sections_divisions_groups_df['embedding_description'].notna().sum()} ({sections_divisions_groups_df['embedding_description'].notna().mean():.1%})")
print(f"  Average length: {sections_divisions_groups_df['embedding_description'].str.len().mean():.0f} characters")
print(f"\nSample embedding description:")
print("=" * 80)

sample = sections_divisions_groups_df[sections_divisions_groups_df['embedding_description'].notna()].iloc[0]
print(f"Group: {sample['group_code']} — {sample['group_label_en']}")
print(f"\nEmbedding description ({len(sample['embedding_description'])} chars):")
print(sample['embedding_description'][:500] + "..." if len(sample['embedding_description']) > 500 else sample['embedding_description'])

✓ Created embedding_description column
  Groups with embedding descriptions: 287 (100.0%)
  Average length: 231 characters

Sample embedding description:
Group: 011 — 01.1 Growing of non-perennial crops

Embedding description (356 chars):
A AGRICULTURE, FORESTRY AND FISHING 01 Crop and animal production, hunting and related service activities 01.1 Growing of non-perennial crops This group includes the growing of non-perennial crops, in other words, plants that do not last for more than two growing seasons. Further included is the growing of these plants for the purpose of seed production.


## 3. Prepare ESCO Data

### 3.01 Read and prepare raw ESCO Data

In [111]:
# Read the raw ESCO occupations data
print(f"Reading ESCO occupations file: {occupations_file}")
print(f"File exists: {occupations_file.exists()}")

esco_df = pd.read_csv(occupations_file)

print(f"\n✓ Loaded {len(esco_df)} ESCO occupations")
print(f"\nColumns: {list(esco_df.columns)}")
print(f"\nFirst few rows:")
esco_df.head()

Reading ESCO occupations file: /Users/lauren/repos/PAD2Skills/data/bronze/esco/occupations_en.csv
File exists: True

✓ Loaded 3043 ESCO occupations

Columns: ['conceptType', 'conceptUri', 'iscoGroup', 'preferredLabel', 'altLabels', 'hiddenLabels', 'status', 'modifiedDate', 'regulatedProfessionNote', 'scopeNote', 'definition', 'inScheme', 'description', 'code', 'naceCode']

First few rows:


Unnamed: 0,conceptType,conceptUri,iscoGroup,preferredLabel,altLabels,hiddenLabels,status,modifiedDate,regulatedProfessionNote,scopeNote,definition,inScheme,description,code,naceCode
0,Occupation,http://data.europa.eu/esco/occupation/00030d09...,2654,technical director,director of technical arts\ntechnical supervis...,,released,2024-01-25T11:28:50.295Z,http://data.europa.eu/esco/regulated-professio...,,,http://data.europa.eu/esco/concept-scheme/memb...,Technical directors realise the artistic visio...,2654.1.7,http://data.europa.eu/ux2/nace2.1/9031
1,Occupation,http://data.europa.eu/esco/occupation/000e93a3...,8121,metal drawing machine operator,wire drawer\nforming machine operative\ndraw m...,,released,2024-01-23T10:09:32.099Z,http://data.europa.eu/esco/regulated-professio...,,,http://data.europa.eu/esco/concept-scheme/memb...,Metal drawing machine operators set up and ope...,8121.4,http://data.europa.eu/ux2/nace2.1/242
2,Occupation,http://data.europa.eu/esco/occupation/0019b951...,7543,precision device inspector,precision device quality control supervisor\np...,,released,2024-01-25T15:00:12.188Z,http://data.europa.eu/esco/regulated-professio...,,,http://data.europa.eu/esco/concept-scheme/memb...,Precision device inspectors make sure precisio...,7543.10.3,http://data.europa.eu/ux2/nace2.1/2651
3,Occupation,http://data.europa.eu/esco/occupation/0022f466...,3155,air traffic safety technician,air traffic safety electronics hardware specia...,,released,2024-01-29T16:01:13.998Z,http://data.europa.eu/esco/regulated-professio...,,,http://data.europa.eu/esco/concept-scheme/memb...,Air traffic safety technicians provide technic...,3155.1,http://data.europa.eu/ux2/nace2.1/5223
4,Occupation,http://data.europa.eu/esco/occupation/002da35b...,2431,hospitality revenue manager,yield manager\nhospitality yields manager\nhos...,,released,2024-01-11T10:28:45.871Z,http://data.europa.eu/esco/regulated-professio...,,,http://data.europa.eu/esco/concept-scheme/memb...,Hospitality revenue managers maximise revenue ...,2431.9,"http://data.europa.eu/ux2/nace2.1/701,\nhttp:/..."


In [112]:
# Extract esco_id from conceptUri and rename naceCode
esco_df['esco_id'] = esco_df['conceptUri'].str.split('/').str[-1]
esco_df = esco_df.rename(columns={'naceCode': 'nace_code_raw'})

# Keep only the required columns
esco_df = esco_df[['esco_id', 'nace_code_raw']]

print(f"Extracted esco_id and renamed naceCode to nace_code_raw")
print(f"Shape: {esco_df.shape}")
print(f"\nFirst few rows:")
esco_df.head()

Extracted esco_id and renamed naceCode to nace_code_raw
Shape: (3043, 2)

First few rows:


Unnamed: 0,esco_id,nace_code_raw
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,http://data.europa.eu/ux2/nace2.1/9031
1,000e93a3-d956-4e45-aacb-f12c83fedf84,http://data.europa.eu/ux2/nace2.1/242
2,0019b951-c699-4191-8208-9822882d150c,http://data.europa.eu/ux2/nace2.1/2651
3,0022f466-426c-41a4-ac96-a235c945cf97,http://data.europa.eu/ux2/nace2.1/5223
4,002da35b-7808-43f3-83bf-63596b8b351f,"http://data.europa.eu/ux2/nace2.1/701,\nhttp:/..."


### 3.02 Explode NACE codes

In [113]:
# Split comma-separated NACE codes and create one row per code
# The nace_code_raw column contains comma-separated lists like "http://data.europa.eu/ux2/nace2.1/01,http://data.europa.eu/ux2/nace2.1/02"

print(f"Before explode: {len(esco_df)} rows")

# Split the comma-separated values and explode
esco_df['nace_code_raw'] = esco_df['nace_code_raw'].str.split(',')
esco_df = esco_df.explode('nace_code_raw').reset_index(drop=True)

print(f"After explode: {len(esco_df)} rows")
print(f"\nFirst few rows:")
esco_df.head(10)

Before explode: 3043 rows
After explode: 4570 rows

First few rows:


Unnamed: 0,esco_id,nace_code_raw
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,http://data.europa.eu/ux2/nace2.1/9031
1,000e93a3-d956-4e45-aacb-f12c83fedf84,http://data.europa.eu/ux2/nace2.1/242
2,0019b951-c699-4191-8208-9822882d150c,http://data.europa.eu/ux2/nace2.1/2651
3,0022f466-426c-41a4-ac96-a235c945cf97,http://data.europa.eu/ux2/nace2.1/5223
4,002da35b-7808-43f3-83bf-63596b8b351f,http://data.europa.eu/ux2/nace2.1/701
5,002da35b-7808-43f3-83bf-63596b8b351f,\nhttp://data.europa.eu/ux2/nace2.1/55
6,0044c991-c26f-4261-a213-4bd1c0564a4c,http://data.europa.eu/ux2/nace2.1/861
7,00634fc4-802a-461b-8af0-499273756f99,http://data.europa.eu/ux2/nace2.1/712
8,00634fc4-802a-461b-8af0-499273756f99,\nhttp://data.europa.eu/ux2/nace2.1/2399
9,00674f21-2f8f-4a41-9896-133f7cbe2a6e,http://data.europa.eu/ux2/nace2.1/852


In [114]:
# Extract nace_code from nace_code_raw (the part after the last slash)
esco_df['nace_code'] = esco_df['nace_code_raw'].str.split('/').str[-1]

print(f"Extracted nace_code from nace_code_raw")
print(f"\nFirst few rows:")
esco_df.head(10)

Extracted nace_code from nace_code_raw

First few rows:


Unnamed: 0,esco_id,nace_code_raw,nace_code
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,http://data.europa.eu/ux2/nace2.1/9031,9031
1,000e93a3-d956-4e45-aacb-f12c83fedf84,http://data.europa.eu/ux2/nace2.1/242,242
2,0019b951-c699-4191-8208-9822882d150c,http://data.europa.eu/ux2/nace2.1/2651,2651
3,0022f466-426c-41a4-ac96-a235c945cf97,http://data.europa.eu/ux2/nace2.1/5223,5223
4,002da35b-7808-43f3-83bf-63596b8b351f,http://data.europa.eu/ux2/nace2.1/701,701
5,002da35b-7808-43f3-83bf-63596b8b351f,\nhttp://data.europa.eu/ux2/nace2.1/55,55
6,0044c991-c26f-4261-a213-4bd1c0564a4c,http://data.europa.eu/ux2/nace2.1/861,861
7,00634fc4-802a-461b-8af0-499273756f99,http://data.europa.eu/ux2/nace2.1/712,712
8,00634fc4-802a-461b-8af0-499273756f99,\nhttp://data.europa.eu/ux2/nace2.1/2399,2399
9,00674f21-2f8f-4a41-9896-133f7cbe2a6e,http://data.europa.eu/ux2/nace2.1/852,852


In [115]:
# Drop the raw NACE code column (we only need the extracted code)
esco_df = esco_df.drop(columns=['nace_code_raw'])

print(f"Dropped nace_code_raw column")
print(f"Remaining columns: {list(esco_df.columns)}")
print(f"Shape: {esco_df.shape}")
print(f"\nFirst few rows:")
esco_df.head(10)

Dropped nace_code_raw column
Remaining columns: ['esco_id', 'nace_code']
Shape: (4570, 2)

First few rows:


Unnamed: 0,esco_id,nace_code
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,9031
1,000e93a3-d956-4e45-aacb-f12c83fedf84,242
2,0019b951-c699-4191-8208-9822882d150c,2651
3,0022f466-426c-41a4-ac96-a235c945cf97,5223
4,002da35b-7808-43f3-83bf-63596b8b351f,701
5,002da35b-7808-43f3-83bf-63596b8b351f,55
6,0044c991-c26f-4261-a213-4bd1c0564a4c,861
7,00634fc4-802a-461b-8af0-499273756f99,712
8,00634fc4-802a-461b-8af0-499273756f99,2399
9,00674f21-2f8f-4a41-9896-133f7cbe2a6e,852


## 4. Merge Groups to Section Codes

### 4.01 Expand group codes to all divisions

In [116]:
# Some NACE codes in esco_df are section codes (single letters like "A", "B", etc.)
# We need to expand these to include all groups within each section
# Keep track of the original code in raw_nace_code

print(f"Before expansion: {len(esco_df)} rows")

# Create raw_nace_code to store the original code before expansion
esco_df['raw_nace_code'] = esco_df['nace_code']

Before expansion: 4570 rows


In [117]:
# Identify the three types of NACE codes:
# - section codes: single uppercase letters (A, B, C, etc.)
# - division codes: exactly 2 digits (01, 02, ..., 99)
# - group codes: 3 or 4 digits (010, 011, ..., 0111, etc.)

esco_df['is_section'] = esco_df['nace_code'].str.match(r'^[A-Z]$')
esco_df['is_division'] = esco_df['nace_code'].str.match(r'^\d{2}$')
esco_df['is_group'] = esco_df['nace_code'].str.match(r'^\d{3,4}$')

print(f"\nBreakdown by code type:")
print(f"  Section codes (letters): {esco_df['is_section'].sum()}")
print(f"  Division codes (2 digits): {esco_df['is_division'].sum()}")
print(f"  Group codes (3-4 digits): {esco_df['is_group'].sum()}")
print(f"  Other/unmatched: {(~(esco_df['is_section'] | esco_df['is_division'] | esco_df['is_group'])).sum()}")


Breakdown by code type:
  Section codes (letters): 62
  Division codes (2 digits): 337
  Group codes (3-4 digits): 4171
  Other/unmatched: 0


In [118]:
# Split into three groups based on code type
section_codes = esco_df[esco_df['is_section']].copy()
division_codes = esco_df[esco_df['is_division']].copy()
group_codes = esco_df[esco_df['is_group']].copy()

print(f"Split into three groups:")
print(f"  Section codes: {len(section_codes)} rows")
print(f"  Division codes: {len(division_codes)} rows")
print(f"  Group codes: {len(group_codes)} rows")

Split into three groups:
  Section codes: 62 rows
  Division codes: 337 rows
  Group codes: 4171 rows


In [119]:
# For section codes, merge with all groups in that section
# This is a many-to-many join that expands each section into multiple groups
section_expanded = section_codes.merge(
    sections_divisions_groups_df[['section_code', 'division_code', 'group_code']],
    left_on='nace_code',
    right_on='section_code',
    how='left'
)

print(f"Section codes expanded:")
print(f"  Before: {len(section_codes)} rows")
print(f"  After: {len(section_expanded)} rows")

# For division codes, merge with all groups in that division
# This is also a many-to-many join that expands each division into multiple groups
division_expanded = division_codes.merge(
    sections_divisions_groups_df[['division_code', 'group_code']],
    left_on='nace_code',
    right_on='division_code',
    how='left'
)

print(f"\nDivision codes expanded:")
print(f"  Before: {len(division_codes)} rows")
print(f"  After: {len(division_expanded)} rows")

Section codes expanded:
  Before: 62 rows
  After: 2963 rows

Division codes expanded:
  Before: 337 rows
  After: 1296 rows


In [120]:
# Update nace_code with group_code for both expanded dataframes
# (raw_nace_code stays as the original section or division code)
section_expanded['nace_code'] = section_expanded['group_code']
division_expanded['nace_code'] = division_expanded['group_code']

# Keep only the columns we need from expanded dataframes
section_expanded = section_expanded[['esco_id', 'raw_nace_code', 'nace_code']]
division_expanded = division_expanded[['esco_id', 'raw_nace_code', 'nace_code']]

# Combine all three groups back together
esco_df = pd.concat([
    group_codes[['esco_id', 'raw_nace_code', 'nace_code']], 
    section_expanded, 
    division_expanded
], ignore_index=True)

print(f"\nAfter combining all groups:")
print(f"  Total rows: {len(esco_df)}")
print(f"  Net increase from original: {len(esco_df) - (len(section_codes) + len(division_codes) + len(group_codes))} rows")
print(f"\nColumns: {list(esco_df.columns)}")
print(f"\nFirst few rows:")
esco_df.head(10)


After combining all groups:
  Total rows: 8430
  Net increase from original: 3860 rows

Columns: ['esco_id', 'raw_nace_code', 'nace_code']

First few rows:


Unnamed: 0,esco_id,raw_nace_code,nace_code
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,9031,9031
1,000e93a3-d956-4e45-aacb-f12c83fedf84,242,242
2,0019b951-c699-4191-8208-9822882d150c,2651,2651
3,0022f466-426c-41a4-ac96-a235c945cf97,5223,5223
4,002da35b-7808-43f3-83bf-63596b8b351f,701,701
5,0044c991-c26f-4261-a213-4bd1c0564a4c,861,861
6,00634fc4-802a-461b-8af0-499273756f99,712,712
7,00634fc4-802a-461b-8af0-499273756f99,2399,2399
8,00674f21-2f8f-4a41-9896-133f7cbe2a6e,852,852
9,006cc1f9-2841-41c3-991a-dc3f2f3bd533,8695,8695


### 4.02 Extract group code from nace_code

In [121]:
# Extract the group code (first 3 digits) from nace_code
# NACE codes can be more granular at the class level but groups are always 3 digits

esco_df['group_code'] = esco_df['nace_code'].str[:3]

print(f"Created group_code column")
print(f"\nSample rows showing nace_code vs group_code:")
print(esco_df[['esco_id', 'raw_nace_code', 'nace_code', 'group_code']].head(10))

# Show breakdown
print(f"\nUnique nace_codes: {esco_df['nace_code'].nunique()}")
print(f"Unique group_codes: {esco_df['group_code'].nunique()}")

Created group_code column

Sample rows showing nace_code vs group_code:
                                esco_id raw_nace_code nace_code group_code
0  00030d09-2b3a-4efd-87cc-c4ea39d27c34          9031      9031        903
1  000e93a3-d956-4e45-aacb-f12c83fedf84           242       242        242
2  0019b951-c699-4191-8208-9822882d150c          2651      2651        265
3  0022f466-426c-41a4-ac96-a235c945cf97          5223      5223        522
4  002da35b-7808-43f3-83bf-63596b8b351f           701       701        701
5  0044c991-c26f-4261-a213-4bd1c0564a4c           861       861        861
6  00634fc4-802a-461b-8af0-499273756f99           712       712        712
7  00634fc4-802a-461b-8af0-499273756f99          2399      2399        239
8  00674f21-2f8f-4a41-9896-133f7cbe2a6e           852       852        852
9  006cc1f9-2841-41c3-991a-dc3f2f3bd533          8695      8695        869

Unique nace_codes: 695
Unique group_codes: 284


### 4.03 Merge group metadata

In [122]:
# Merge group metadata from sections_divisions_groups_df
# This adds section_code, section_label_en, division_code, division_label_en, group_label_en, and embedding_description

print(f"Before merge: {len(esco_df)} rows")
print(f"Columns: {list(esco_df.columns)}")

# Left merge on group_code
esco_df = esco_df.merge(
    sections_divisions_groups_df[['group_code', 'section_code', 'section_label_en', 'division_code', 'division_label_en', 'group_label_en', 'embedding_description']],
    on='group_code',
    how='left'
)

print(f"\nAfter merge: {len(esco_df)} rows")
print(f"Columns: {list(esco_df.columns)}")

# Check for any unmatched groups
unmatched = esco_df['section_code'].isna().sum()
print(f"\nUnmatched groups (no metadata): {unmatched} ({unmatched/len(esco_df):.1%})")

print(f"\nFirst few rows:")
esco_df.head(10)

Before merge: 8430 rows
Columns: ['esco_id', 'raw_nace_code', 'nace_code', 'group_code']

After merge: 8430 rows
Columns: ['esco_id', 'raw_nace_code', 'nace_code', 'group_code', 'section_code', 'section_label_en', 'division_code', 'division_label_en', 'group_label_en', 'embedding_description']

Unmatched groups (no metadata): 0 (0.0%)

First few rows:


Unnamed: 0,esco_id,raw_nace_code,nace_code,group_code,section_code,section_label_en,division_code,division_label_en,group_label_en,embedding_description
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,9031,9031,903,S,"S ARTS, SPORTS AND RECREATION",90,90 Arts creation and performing arts activities,90.3 Support activities to arts creation and p...,"S ARTS, SPORTS AND RECREATION 90 Arts creation..."
1,000e93a3-d956-4e45-aacb-f12c83fedf84,242,242,242,C,C MANUFACTURING,24,24 Manufacture of basic metals,"24.2 Manufacture of tubes, pipes, hollow profi...",C MANUFACTURING 24 Manufacture of basic metals...
2,0019b951-c699-4191-8208-9822882d150c,2651,2651,265,C,C MANUFACTURING,26,"26 Manufacture of computer, electronic and opt...",26.5 Manufacture of measuring testing instrume...,"C MANUFACTURING 26 Manufacture of computer, el..."
3,0022f466-426c-41a4-ac96-a235c945cf97,5223,5223,522,H,H TRANSPORTATION AND STORAGE,52,"52 Warehousing, storage and support activities...",52.2 Support activities for transportation,"H TRANSPORTATION AND STORAGE 52 Warehousing, s..."
4,002da35b-7808-43f3-83bf-63596b8b351f,701,701,701,N,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV...",70,70 Activities of head offices and management c...,70.1 Activities of head offices,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV..."
5,0044c991-c26f-4261-a213-4bd1c0564a4c,861,861,861,R,R HUMAN HEALTH AND SOCIAL WORK ACTIVITIES,86,86 Human health activities,86.1 Hospital activities,R HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 86 H...
6,00634fc4-802a-461b-8af0-499273756f99,712,712,712,N,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV...",71,71 Architectural and engineering activities; t...,71.2 Technical testing and analysis,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV..."
7,00634fc4-802a-461b-8af0-499273756f99,2399,2399,239,C,C MANUFACTURING,23,23 Manufacture of other non-metallic mineral p...,23.9 Manufacture of abrasive products and non-...,C MANUFACTURING 23 Manufacture of other non-me...
8,00674f21-2f8f-4a41-9896-133f7cbe2a6e,852,852,852,Q,Q EDUCATION,85,85 Education,85.2 Primary education,Q EDUCATION 85 Education 85.2 Primary education
9,006cc1f9-2841-41c3-991a-dc3f2f3bd533,8695,8695,869,R,R HUMAN HEALTH AND SOCIAL WORK ACTIVITIES,86,86 Human health activities,86.9 Other human health activities,R HUMAN HEALTH AND SOCIAL WORK ACTIVITIES 86 H...


### 4.04 Quality check: Section code expansions

In [123]:
# Quality check 1: Section code expansions
# Show sample of rows where raw_nace_code is a section letter (single character)
# This helps verify that section codes were properly expanded to groups

section_letter_mask = esco_df['raw_nace_code'].str.match(r'^[A-Z]$', na=False)
section_letter_rows = esco_df[section_letter_mask]

print("=" * 100)
print("QUALITY CHECK 1: Section Code Expansions")
print("=" * 100)
print(f"Total rows with section letter in raw_nace_code: {len(section_letter_rows):,}")
print(f"Unique ESCO IDs with section codes: {section_letter_rows['esco_id'].nunique():,}")
print(f"\nShowing 20 sample rows for quality inspection:")

# Select relevant columns and show 20 rows
inspection_cols = ['esco_id', 'raw_nace_code', 'nace_code', 'group_code', 
                   'section_code', 'division_code', 'section_label_en', 'division_label_en', 'group_label_en']
display(section_letter_rows[inspection_cols].head(20))

QUALITY CHECK 1: Section Code Expansions
Total rows with section letter in raw_nace_code: 2,963
Unique ESCO IDs with section codes: 62

Showing 20 sample rows for quality inspection:


Unnamed: 0,esco_id,raw_nace_code,nace_code,group_code,section_code,division_code,section_label_en,division_label_en,group_label_en
4171,0cca32c2-9308-4927-adb2-14771ab787f0,F,410,410,F,41,F CONSTRUCTION,41 Construction of residential and non-residen...,41.0 Construction of residential and non-resid...
4172,0cca32c2-9308-4927-adb2-14771ab787f0,F,421,421,F,42,F CONSTRUCTION,42 Civil engineering,42.1 Construction of roads and railways
4173,0cca32c2-9308-4927-adb2-14771ab787f0,F,422,422,F,42,F CONSTRUCTION,42 Civil engineering,42.2 Construction of utility projects
4174,0cca32c2-9308-4927-adb2-14771ab787f0,F,429,429,F,42,F CONSTRUCTION,42 Civil engineering,42.9 Construction of other civil engineering p...
4175,0cca32c2-9308-4927-adb2-14771ab787f0,F,431,431,F,43,F CONSTRUCTION,43 Specialised construction activities,43.1 Demolition and site preparation
4176,0cca32c2-9308-4927-adb2-14771ab787f0,F,432,432,F,43,F CONSTRUCTION,43 Specialised construction activities,"43.2 Electrical, plumbing and other constructi..."
4177,0cca32c2-9308-4927-adb2-14771ab787f0,F,433,433,F,43,F CONSTRUCTION,43 Specialised construction activities,43.3 Building completion and finishing
4178,0cca32c2-9308-4927-adb2-14771ab787f0,F,434,434,F,43,F CONSTRUCTION,43 Specialised construction activities,43.4 Specialised construction activities in co...
4179,0cca32c2-9308-4927-adb2-14771ab787f0,F,435,435,F,43,F CONSTRUCTION,43 Specialised construction activities,43.5 Specialised construction activities in ci...
4180,0cca32c2-9308-4927-adb2-14771ab787f0,F,436,436,F,43,F CONSTRUCTION,43 Specialised construction activities,43.6 Intermediation service activities for spe...


In [124]:
# Quality check 2: Division code expansions
# Show sample of rows where raw_nace_code is a 2-digit division
# This helps verify that division codes were properly expanded to groups

division_mask = esco_df['raw_nace_code'].str.match(r'^\d{2}$', na=False)
division_rows = esco_df[division_mask]

print("\n" + "=" * 100)
print("QUALITY CHECK 2: Division Code Expansions")
print("=" * 100)
print(f"Total rows with 2-digit division in raw_nace_code: {len(division_rows):,}")
print(f"Unique ESCO IDs with division codes: {division_rows['esco_id'].nunique():,}")
print(f"\nShowing 20 sample rows for quality inspection:")

display(division_rows[inspection_cols].head(20))


QUALITY CHECK 2: Division Code Expansions
Total rows with 2-digit division in raw_nace_code: 1,296
Unique ESCO IDs with division codes: 225

Showing 20 sample rows for quality inspection:


Unnamed: 0,esco_id,raw_nace_code,nace_code,group_code,section_code,division_code,section_label_en,division_label_en,group_label_en
7134,002da35b-7808-43f3-83bf-63596b8b351f,55,551,551,I,55,I ACCOMMODATION AND FOOD SERVICE ACTIVITIES,55 Accommodation,55.1 Hotels and similar accommodation
7135,002da35b-7808-43f3-83bf-63596b8b351f,55,552,552,I,55,I ACCOMMODATION AND FOOD SERVICE ACTIVITIES,55 Accommodation,55.2 Holiday and other short-stay accommodation
7136,002da35b-7808-43f3-83bf-63596b8b351f,55,553,553,I,55,I ACCOMMODATION AND FOOD SERVICE ACTIVITIES,55 Accommodation,55.3 Camping grounds and recreational vehicle ...
7137,002da35b-7808-43f3-83bf-63596b8b351f,55,554,554,I,55,I ACCOMMODATION AND FOOD SERVICE ACTIVITIES,55 Accommodation,55.4 Intermediation service activities for acc...
7138,002da35b-7808-43f3-83bf-63596b8b351f,55,559,559,I,55,I ACCOMMODATION AND FOOD SERVICE ACTIVITIES,55 Accommodation,55.9 Other accommodation
7139,00ab29a3-6d4a-4df9-b46d-de31069e36e8,7,71,71,B,7,B MINING AND QUARRYING,07 Mining of metal ores,07.1 Mining of iron ores
7140,00ab29a3-6d4a-4df9-b46d-de31069e36e8,7,72,72,B,7,B MINING AND QUARRYING,07 Mining of metal ores,07.2 Mining of non-ferrous metal ores
7141,00ab5610-e715-428f-99f6-b1e5e469dbcd,10,101,101,C,10,C MANUFACTURING,10 Manufacture of food products,10.1 Processing and preserving of meat and pro...
7142,00ab5610-e715-428f-99f6-b1e5e469dbcd,10,102,102,C,10,C MANUFACTURING,10 Manufacture of food products,"10.2 Processing and preserving of fish, crusta..."
7143,00ab5610-e715-428f-99f6-b1e5e469dbcd,10,103,103,C,10,C MANUFACTURING,10 Manufacture of food products,10.3 Processing and preserving of fruit and ve...


### 4.05 Drop intermediate columns and save

In [125]:
# Drop intermediate columns that were used for processing
esco_df = esco_df.drop(columns=['nace_code', 'raw_nace_code'])

In [126]:
# Drop any duplicate rows
before_dedup = len(esco_df)
print(f"Before deduplication: {before_dedup} rows")
esco_df = esco_df.drop_duplicates()
after_dedup = len(esco_df)
print(f"After deduplication: {after_dedup} rows")
print(f"Duplicates removed: {before_dedup - after_dedup}")

Before deduplication: 8430 rows
After deduplication: 8235 rows
Duplicates removed: 195


In [127]:
# Reorder columns for clarity
esco_df = esco_df[['esco_id', 'section_code', 'section_label_en', 'division_code', 'division_label_en','group_code', 'group_label_en', 'embedding_description']]

# Create output directory
output_dir = project_root / "data" / "silver" / "esco_nace_csv"
output_dir.mkdir(parents=True, exist_ok=True)

# Save to CSV
output_file = output_dir / "esco_nace_groups.csv"
esco_df.to_csv(output_file, index=False)

print(f"✓ Dropped intermediate columns: nace_code, raw_nace_code")
print(f"✓ Reordered columns")
print(f"✓ Saved {len(esco_df)} rows to: {output_file}")
print(f"\nFinal columns: {list(esco_df.columns)}")
print(f"\nFinal shape: {esco_df.shape}")
print(f"\nFirst few rows:")
esco_df.head()

✓ Dropped intermediate columns: nace_code, raw_nace_code
✓ Reordered columns
✓ Saved 8235 rows to: /Users/lauren/repos/PAD2Skills/data/silver/esco_nace_csv/esco_nace_groups.csv

Final columns: ['esco_id', 'section_code', 'section_label_en', 'division_code', 'division_label_en', 'group_code', 'group_label_en', 'embedding_description']

Final shape: (8235, 8)

First few rows:


Unnamed: 0,esco_id,section_code,section_label_en,division_code,division_label_en,group_code,group_label_en,embedding_description
0,00030d09-2b3a-4efd-87cc-c4ea39d27c34,S,"S ARTS, SPORTS AND RECREATION",90,90 Arts creation and performing arts activities,903,90.3 Support activities to arts creation and p...,"S ARTS, SPORTS AND RECREATION 90 Arts creation..."
1,000e93a3-d956-4e45-aacb-f12c83fedf84,C,C MANUFACTURING,24,24 Manufacture of basic metals,242,"24.2 Manufacture of tubes, pipes, hollow profi...",C MANUFACTURING 24 Manufacture of basic metals...
2,0019b951-c699-4191-8208-9822882d150c,C,C MANUFACTURING,26,"26 Manufacture of computer, electronic and opt...",265,26.5 Manufacture of measuring testing instrume...,"C MANUFACTURING 26 Manufacture of computer, el..."
3,0022f466-426c-41a4-ac96-a235c945cf97,H,H TRANSPORTATION AND STORAGE,52,"52 Warehousing, storage and support activities...",522,52.2 Support activities for transportation,"H TRANSPORTATION AND STORAGE 52 Warehousing, s..."
4,002da35b-7808-43f3-83bf-63596b8b351f,N,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV...",70,70 Activities of head offices and management c...,701,70.1 Activities of head offices,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV..."


### 4.06 Create inspection file

In [128]:
# Create inspection dataset without esco_id to see unique NACE groups
inspect_df = esco_df.drop(columns=['esco_id'])

# Drop duplicates
print(f"Before deduplication: {len(inspect_df)} rows")
inspect_df = inspect_df.drop_duplicates()
print(f"After deduplication: {len(inspect_df)} rows")

# Save to CSV
inspect_output_file = output_dir / "inspect_esco_nace_groups.csv"
inspect_df.to_csv(inspect_output_file, index=False)

print(f"\n✓ Saved {len(inspect_df)} unique NACE groups to: {inspect_output_file}")
print(f"\nColumns: {list(inspect_df.columns)}")
print(f"\nFirst few rows:")
inspect_df.head()

Before deduplication: 8235 rows
After deduplication: 284 rows

✓ Saved 284 unique NACE groups to: /Users/lauren/repos/PAD2Skills/data/silver/esco_nace_csv/inspect_esco_nace_groups.csv

Columns: ['section_code', 'section_label_en', 'division_code', 'division_label_en', 'group_code', 'group_label_en', 'embedding_description']

First few rows:


Unnamed: 0,section_code,section_label_en,division_code,division_label_en,group_code,group_label_en,embedding_description
0,S,"S ARTS, SPORTS AND RECREATION",90,90 Arts creation and performing arts activities,903,90.3 Support activities to arts creation and p...,"S ARTS, SPORTS AND RECREATION 90 Arts creation..."
1,C,C MANUFACTURING,24,24 Manufacture of basic metals,242,"24.2 Manufacture of tubes, pipes, hollow profi...",C MANUFACTURING 24 Manufacture of basic metals...
2,C,C MANUFACTURING,26,"26 Manufacture of computer, electronic and opt...",265,26.5 Manufacture of measuring testing instrume...,"C MANUFACTURING 26 Manufacture of computer, el..."
3,H,H TRANSPORTATION AND STORAGE,52,"52 Warehousing, storage and support activities...",522,52.2 Support activities for transportation,"H TRANSPORTATION AND STORAGE 52 Warehousing, s..."
4,N,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV...",70,70 Activities of head offices and management c...,701,70.1 Activities of head offices,"N PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIV..."
