(C) Crown Copyright, Met Office. All rights reserved.

## file_density.ipynb

Look at the desnity of data in files to see how many bytes are required to store each data point.

In [1]:
import os
import warnings

import iris

# Ignore warnings displayed when loading data
warnings.filterwarnings("ignore")

In [2]:
filepaths = [
    '/gws/nopw/j04/primavera5/stream1/CMIP6/HighResMIP/AWI/AWI-CM-1-1-HR/hist-1950/r1i1p1f2/Eday/ta/gn/v20171119/'
    'ta_Eday_AWI-CM-1-1-HR_hist-1950_r1i1p1f2_gn_19510101-19511231.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/CMCC/CMCC-CM2-VHR4/hist-1950/r1i1p1f1/6hrPlevPt/ta/gn/v20180705/'
    'ta_6hrPlevPt_CMCC-CM2-VHR4_hist-1950_r1i1p1f1_gn_195001010000-195001311800.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/CNRM-CERFACS/CNRM-CM6-1-HR/hist-1950/r1i1p1f2/6hrPlevPt/ta/gr/v20190221/'
    'ta_6hrPlevPt_CNRM-CM6-1-HR_hist-1950_r1i1p1f2_gr_195001010600-195004010000.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/EC-Earth-Consortium/EC-Earth3P-HR/hist-1950/r1i1p2f1/6hrPlevPt/ta/gr/v20181212/'
    'ta_6hrPlevPt_EC-Earth3P-HR_hist-1950_r1i1p2f1_gr_195001010000-195012311800.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r1i1p1f1/6hrPlevPt/ta/gr/v20170915/'
    'ta_6hrPlevPt_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gr_195001010000-195001311800.nc',
    '/gws/nopw/j04/primavera2/stream1/CMIP6/HighResMIP/MPI-M/MPI-ESM1-2-XR/hist-1950/r1i1p1f1/6hrPlevPt/ta/gn/v20180606/'
    'ta_6hrPlevPt_MPI-ESM1-2-XR_hist-1950_r1i1p1f1_gn_195001010558-195012312358.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/MOHC/HadGEM3-GC31-HM/hist-1950/r1i1p1f1/E3hrPt/ta/gn/v20180730/'
    'ta_E3hrPt_HadGEM3-GC31-HM_hist-1950_r1i1p1f1_gn_195001010300-195001302100.nc',
]

In [3]:
for filepath in filepaths:
    size = os.stat(filepath).st_size
    cube = iris.load_cube(filepath)
    dim_sizes = []
    for dim_coord in cube.dim_coords:
        dim_sizes.append(dim_coord.shape[0])
    num_points = 1
    for dim in dim_sizes:
        num_points *= dim
    density = size / num_points
    print(f'{os.path.basename(filepath)} {density:.1f} bytes/point')

ta_Eday_AWI-CM-1-1-HR_hist-1950_r1i1p1f2_gn_19510101-19511231.nc 3.0 bytes/point
ta_6hrPlevPt_CMCC-CM2-VHR4_hist-1950_r1i1p1f1_gn_195001010000-195001311800.nc 2.1 bytes/point
ta_6hrPlevPt_CNRM-CM6-1-HR_hist-1950_r1i1p1f2_gr_195001010600-195004010000.nc 1.9 bytes/point
ta_6hrPlevPt_EC-Earth3P-HR_hist-1950_r1i1p2f1_gr_195001010000-195012311800.nc 2.2 bytes/point
ta_6hrPlevPt_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gr_195001010000-195001311800.nc 1.5 bytes/point
ta_6hrPlevPt_MPI-ESM1-2-XR_hist-1950_r1i1p1f1_gn_195001010558-195012312358.nc 1.5 bytes/point
ta_E3hrPt_HadGEM3-GC31-HM_hist-1950_r1i1p1f1_gn_195001010300-195001302100.nc 1.7 bytes/point


## Precip ##

Precip should compress less than temperature

In [4]:
filepaths = [
    '/gws/nopw/j04/primavera5/stream1/CMIP6/HighResMIP/AWI/AWI-CM-1-1-HR/hist-1950/r1i1p1f2/3hr/pr/gn/v20171119/'
    'pr_3hr_AWI-CM-1-1-HR_hist-1950_r1i1p1f2_gn_195101010130-195112312230.nc',
    '/gws/nopw/j04/primavera5/stream1/PRIMAVERA/HighResMIP/CMCC/CMCC-CM2-VHR4/hist-1950/r1i1p1f1/Prim6hr/pr/gn/v20180705/'
    'pr_Prim6hr_CMCC-CM2-VHR4_hist-1950_r1i1p1f1_gn_195001010000-195001311800.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/CNRM-CERFACS/CNRM-CM6-1-HR/hist-1950/r1i1p1f2/3hr/pr/gr/v20190221/'
    'pr_3hr_CNRM-CM6-1-HR_hist-1950_r1i1p1f2_gr_195001010130-195012312230.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/EC-Earth-Consortium/EC-Earth3P-HR/hist-1950/r1i1p2f1/3hr/pr/gr/v20181212/'
    'pr_3hr_EC-Earth3P-HR_hist-1950_r1i1p2f1_gr_195001010000-195012312100.nc',
    '/gws/nopw/j04/primavera5/stream1/PRIMAVERA/HighResMIP/ECMWF/ECMWF-IFS-HR/hist-1950/r1i1p1f1/Prim6hr/pr/gr/v20170915/'
    'pr_Prim6hr_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gr_195001010300-195012312100.nc',
    '/gws/nopw/j04/primavera4/stream1/PRIMAVERA/HighResMIP/MPI-M/MPI-ESM1-2-XR/hist-1950/r1i1p1f1/Prim6hr/pr/gn/v20180606/'
    'pr_Prim6hr_MPI-ESM1-2-XR_hist-1950_r1i1p1f1_gn_195001010558-195012312358.nc',
    '/badc/cmip6/data/CMIP6/HighResMIP/MOHC/HadGEM3-GC31-HM/hist-1950/r1i1p1f1/3hr/pr/gn/v20180730/'
    'pr_3hr_HadGEM3-GC31-HM_hist-1950_r1i1p1f1_gn_195001010130-195006302230.nc'
]

In [5]:
for filepath in filepaths:
    size = os.stat(filepath).st_size
    cube = iris.load_cube(filepath)
    dim_sizes = []
    for dim_coord in cube.dim_coords:
        dim_sizes.append(dim_coord.shape[0])
    num_points = 1
    for dim in dim_sizes:
        num_points *= dim
    density = size / num_points
    print(f'{os.path.basename(filepath)} {density:.1f} bytes/point')

pr_3hr_AWI-CM-1-1-HR_hist-1950_r1i1p1f2_gn_195101010130-195112312230.nc 3.8 bytes/point
pr_Prim6hr_CMCC-CM2-VHR4_hist-1950_r1i1p1f1_gn_195001010000-195001311800.nc 3.2 bytes/point
pr_3hr_CNRM-CM6-1-HR_hist-1950_r1i1p1f2_gr_195001010130-195012312230.nc 3.1 bytes/point
pr_3hr_EC-Earth3P-HR_hist-1950_r1i1p2f1_gr_195001010000-195012312100.nc 2.2 bytes/point
pr_Prim6hr_ECMWF-IFS-HR_hist-1950_r1i1p1f1_gr_195001010300-195012312100.nc 1.6 bytes/point
pr_Prim6hr_MPI-ESM1-2-XR_hist-1950_r1i1p1f1_gn_195001010558-195012312358.nc 2.1 bytes/point
pr_3hr_HadGEM3-GC31-HM_hist-1950_r1i1p1f1_gn_195001010130-195006302230.nc 3.1 bytes/point
