Create the output files for every country and all CMC combinations. \
This takes about 30 minutes (for ~615 Individual Services).

In [None]:
from __future__ import annotations

from pathlib import Path

from msl.kcdb import Physics

# Get all Individual Services
physics = Physics()
print(f"Get all Individual Services for {physics.DOMAIN.name}...")
metrology_areas = physics.metrology_areas()
print(f"  There are {len(metrology_areas)} metrology areas")
branches = [b for ma in metrology_areas for b in physics.branches(ma)]
print(f"  There are {len(branches)} branches")
services = [s for b in branches for s in sorted(physics.services(b))]
print(f"  There are a total of {len(services)} Services")
sub_services = [ss for s in services for ss in sorted(physics.sub_services(s))]
print(f"  There are a total of {len(sub_services)} Sub Services")
individual_services = [ind for ss in sub_services for ind in sorted(physics.individual_services(ss))]
print(f"  There are a total of {len(individual_services)} Individual Services")

# All files are saved to this directory
root_path = Path().resolve().parent / "input" / "physics-data"
root_path.mkdir(parents=True, exist_ok=True)

# Get the list of countries
countries = physics.countries()

# Create an output file for each country with a header line
for country in countries:
    with (root_path / f"{country.value}.cmc").open("w") as f:
        _ = f.write(
            "Country;Metrology Area;Branch;Service;Sub Service;Individual Service;Physics Code;Number of CMCs\n"
        )

# Call physics.search() for every Individual Service and keep track of the number of CMCs for every country
n = len(individual_services)
for i, item in enumerate(individual_services, start=1):
    counts = {c.value: 0 for c in countries}
    sub_service = item.sub_service
    service = sub_service.service
    branch = service.branch
    area = branch.metrology_area
    print(f"[{i:3d} of {n}] Branch={branch.label} Code={item.physics_code}")
    result = physics.search(area, branch=branch, physics_code=item, page_size=physics.MAX_PAGE_SIZE)
    for data in result.data:
        counts[data.country_value] += 1

    # Append the number of CMCs (and some additional metadata) to the output files
    for k, v in counts.items():
        with (root_path / f"{k}.cmc").open("a") as f:
            _ = f.write(
                f"{k};{area.value};{branch.value};{service.value};{sub_service.value};{item.value};{item.physics_code};{v}\n"
            )

Generate the non-zero files. This takes about 2 minutes.

In [None]:
from pathlib import Path

import numpy as np

from msl.kcdb import Physics

physics = Physics()
countries = physics.countries()

# Load all data
root_path = Path().resolve().parent / "input" / "physics-data"
data: dict[str, np.ndarray] = {}
for country in countries:
    print(f"Loading data for {country.value}")
    array = np.genfromtxt(root_path / f"{country.value}.cmc", delimiter=";", names=True, dtype=None)
    data[country.value] = array

# Generate the "non-zero" files
root_path = Path().resolve().parent / "input" / "physics-data-Non-zero"
root_path.mkdir(exist_ok=True)

summary_path = root_path / "summary"
summary_path.mkdir(exist_ok=True)

for country in countries:
    with (root_path / f"{country.value}-Non-zero.cmc").open("w") as f:
        _ = f.write(
            "Country;Metrology Area;Branch;Service;Sub Service;Individual Service;Physics Code;Number of CMCs\n"
        )

with (summary_path / "physics-Non-zero-Summary.cmc").open("w") as f:
    _ = f.write(
        "Metrology Area;Branch;Service;Sub Service;Individual Service;Physics Code;Number of Countries;Countries\n"
    )

num_rows = next(iter(data.values())).size
for row in range(num_rows):
    print(f"Processing CMC {row + 1:5d} of {num_rows}")
    counts = 0
    names: list[str] = []
    middle: list[str] = []
    for country in countries:
        first, *middle, num_cmc = data[country.value][row]
        if num_cmc > 0:
            counts += num_cmc
            names.append(first)

    if counts > 0:
        with (summary_path / "physics-Non-zero-Summary.cmc").open("a") as f:
            _ = f.write(f"{';'.join(middle)};{len(names)};{'|'.join(names)}\n")

        for country in countries:
            line = ";".join(str(v) for v in data[country.value][row])
            with (root_path / f"{country.value}-Non-zero.cmc").open("a") as f:
                _ = f.write(f"{line}\n")