In [None]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'Times New Roman'

In [None]:
def create_box_plot(data, all_data, field_names, y_text, x_axis, y_limit, columns, x_labels, country, country_index, year, path):
    start_idx, end_idx = columns[0] - 1, columns[1]
    data_to_plot = data[start_idx:end_idx]
    country_data_to_plot = [row[country_index] for row in all_data[start_idx:end_idx]]
    x_labels = field_names[start_idx:end_idx]

    fig, ax = plt.subplots(figsize=(10, 7))
    ax.boxplot(data_to_plot, whis=(0, 100),medianprops=dict(color='blue'))

    # add data from one country - identified by country index - to the boxplot
    x_positions = np.arange(1, end_idx - start_idx + 1)
    ax.plot(x_positions, country_data_to_plot, 'b.', markersize=15)

    ax.set_xticks(x_positions)
    ax.set_xticklabels(x_labels, rotation=25)
    ax.tick_params(axis='both', which='major', labelsize='x-large')
    ax.set_ylim(0,y_limit)
    ax.set_title(str(year), fontsize='xx-large')

    inset_ax = ax.inset_axes((0.45,0.78,.1,.1))
    inset_ax.boxplot([1,0.75,0.5,0.25,0], whis=(0, 100),medianprops=dict(color='blue'))
    inset_ax.set_axis_off()

    ax.text(0.55,0.8,y_text,size='xx-large',transform=ax.transAxes)
    ax.plot(0.50,0.72, 'b.',markersize=15,transform=ax.transAxes)
    ax.text(0.55,.7, country,size = 'xx-large',transform=ax.transAxes)
    ax.set_ylabel("Number of service categories",size='xx-large',wrap=True)
    ax.set_xlabel(x_axis,size='xx-large')
    fig.subplots_adjust(bottom=0.2)

    plt.savefig(path+country+".png")#, bbox_inches='tight')
    plt.close(fig)
    del fig, ax, data_to_plot, country_data_to_plot, x_positions


In [None]:
# create datasets and box plots for sets of countries - all, or 'broad range' countries, overlay one country.
# for either physics areas, or the branches of a physics area
# for years between 2001 and 2024

path = str(Path().resolve().parent / "output")

field_names_for_plot = ["Sound in Air", "Sound in Water", "Vibration", "AC", "DC", "Fields", "High V and A", "Impedance",
                        "Materials","Other DC", "RF", "Dimensional", "Laser", "Density", "Flow", "Force", "Gravity", "Hardness",
                        "Mass", "Pressure", "Torque", "Viscoscity", "Fibre Optics", "Photometry", "Detectors and Source", "Materials",
                        "Humidity", "Temperature","Frequency", "Time interval", "Time scale difference", "AUV", "EM","L","M","PR","T","TF"]

# create list of countries who have a broad range of capabilities
# defined as those who have claims in at least 6 areas of General Physics

# get all Dec 2024 data
data2024 = np.genfromtxt(path+"/services-by-branch-all-countries/"+"2024.txt", delimiter=";", names=True, dtype=None, encoding='latin-1')

# Identify broad-range countries
broad_range_countries = [
    row["Country"] for row in data2024[2:]  # Skip header and total row
    if sum(row[32 + j] > 0 for j in range(7)) >= 6
]
print("There are ", len(broad_range_countries)," countries which have claims in at least 6 areas of General Physics in Dec 2024")
print("The countries are ",broad_range_countries)

for year in range(2001,2025):
    print(year)
    # get all data
    data = np.genfromtxt(path+"/services-by-branch-all-countries/"+str(year)+".txt", delimiter=";", names=True, dtype=None, encoding='latin-1')

    # create subset of data for broad range countries only
    broad_range_data = [row for row in data if row["Country"] in broad_range_countries]

    # create dataset for all countries for plotting - removing field names, removing first column of country names, final empty column, and first row of totals
    dataset = [data[field][1:] for field in data.dtype.names[1:-1]]
    # create dataset for broad range countries for plotting - removing field names, removing first column of country names and final empty column (totals already deleted)
    broad_range_dataset = [np.array([row[i] for row in broad_range_data]) for i in range(1, len(data.dtype.names) - 1)]

    c = len(data["Country"][1:])
    # set index of country of interest to add to the boxplots
    for country_index in range(0,c):
        # select out data row specific to this country
        country_to_plot = data["Country"][1:][country_index]

        # generalinfo for plotting
        x_labels = ('All MRA Participants',"'Broad range' Participants")
        data_sets_for_use = (dataset,broad_range_dataset)
        dataset_names = ("All","Broad")

        # specific info for plotting
        x_axis_labels = ['Physics Area','Electricity and Magnetism Branch','Acoustics and Vibration Branch','Length Branch','Mass and Related Quantities Branch','Photometry and Radiometry Branch', 'Thermometry Branch', 'Time and Frequency Branch']
        plot_types = ['physics','EM','AUV','L','M','PR','T','TF']
        columns_to_use = [[32,38],[4,11],[1,3],[12,13],[14,22],[23,26],[27,28],[29,31]]
        y_limits = [115,30,15,42,5.5,20.5,15.5,20.5]

        p = 0
        # there are 8 different types of plot - one covering all the physics areas, and then one covering each area.
        for plot in plot_types:
            # there are two sub-types of plot - one showing the results against all participants, and one against only 'broad range' participants
            for y_text, dataset_to_use, dataset_name in zip(x_labels, data_sets_for_use,dataset_names):
                outpath = path+"/box_plots/"+plot+"/"+dataset_name+"/"+str(year)+"/"
                if not os.path.exists(outpath):
                    os.makedirs(outpath)
                create_box_plot(dataset_to_use, dataset, field_names_for_plot,y_text, x_axis_labels[p], y_limits[p], columns_to_use[p],
                             field_names_for_plot, country_to_plot, country_index, year, outpath)
            p +=1
    del data, broad_range_data, dataset, broad_range_dataset