Create the output files for every country and all CMC combinations. \
This takes about 6 minutes (for about 88,500 combinations).

In [None]:
from __future__ import annotations

from pathlib import Path

from msl.kcdb import Radiation

radiation = Radiation(timeout=100)

# Get the list of countries
countries = radiation.countries()

# Get all nuclides (only relevant for the Radioactivity branch)
nuclides = radiation.nuclides()

# Prepare the results dict for every combination
results: dict[str, int] = {}
for area in radiation.metrology_areas():
    for branch in radiation.branches(area):
        print(f"Preparing {branch.value}")
        quantities = radiation.quantities(branch)
        mediums = radiation.mediums(branch)
        sources = radiation.sources(branch)
        for quantity in quantities:
            for medium in mediums:
                for source in sources:
                    for c in countries:
                        results[f"{c.value};{branch.value};{quantity.value};{medium.value};{source.value};;"] = 0
                    if branch.label == "RAD":
                        for c in countries:
                            for nuclide in nuclides:
                                results[
                                    f"{c.value};{branch.value};{quantity.value};{medium.value};{source.value};{nuclide.value};"
                                ] = 0

# Count the CMCs for each country
for area in radiation.metrology_areas():
    for branch in radiation.branches(area):
        print(f"Counting CMCs for {branch.value}")
        result = radiation.search(branch=branch, page_size=3000)
        assert result.total_pages == 1
        for data in result.data:
            key = f"{data.country_value};{data.branch_value};{data.quantity_value};{data.medium_value};{data.source_value};{data.nuclide_value};"
            results[key] += 1

# All files are saved to this directory
root_path = Path().resolve().parent / "input" / "radiation-data"
root_path.mkdir(parents=True, exist_ok=True)

# Create an output file for each country with a header line
for country in countries:
    print(f"Saving data for {country.value}")
    with (root_path / f"{country.value}.cmc").open("w") as f:
        _ = f.write("Country;Branch;Quantity;Medium;Source;Nuclide;Number of CMCs\n")
        for k, v in results.items():
            if k.startswith(country.value):
                _ = f.write(f"{k}{v}\n")

Generate the non-zero files. This takes about 6 minutes.

In [None]:
from pathlib import Path

import numpy as np

from msl.kcdb import Radiation

radiation = Radiation()
countries = radiation.countries()

# Load all data
root_path = Path().resolve().parent / "input" / "radiation-data"
data: dict[str, np.ndarray] = {}
for country in countries:
    print(f"Loading data for {country.value}")
    array = np.genfromtxt(root_path / f"{country.value}.cmc", delimiter=";", names=True, dtype=None)
    data[country.value] = array

# Generate the "non-zero" files
root_path = Path().resolve().parent / "input" / "radiation-data-Non-zero"
root_path.mkdir(exist_ok=True)

summary_path = root_path / "summary"
summary_path.mkdir(exist_ok=True)

for country in countries:
    with (root_path / f"{country.value}-Non-zero.cmc").open("w") as f:
        _ = f.write("Country;Branch;Quantity;Medium;Source;Nuclide;Number of CMCs\n")

with (root_path / "radiation-Non-zero-Summary.cmc").open("w") as f:
    _ = f.write("Branch;Quantity;Medium;Source;Nuclide;Number of Countries;Countries\n")

num_rows = next(iter(data.values())).size
for row in range(num_rows):
    print(f"Processing CMC {row + 1:5d} of {num_rows}")
    counts = 0
    names: list[str] = []
    middle: list[str] = []
    for country in countries:
        first, *middle, num_cmc = data[country.value][row]
        if num_cmc > 0:
            counts += num_cmc
            names.append(first)

    if counts > 0:
        with (summary_path / "radiation-Non-zero-Summary.cmc").open("a") as f:
            _ = f.write(f"{';'.join(middle)};{len(names)};{'|'.join(names)}\n")

        for country in countries:
            line = ";".join(str(v) for v in data[country.value][row])
            with (root_path / f"{country.value}-Non-zero.cmc").open("a") as f:
                _ = f.write(f"{line}\n")