# add_month_coordinate_2GISready_nc

Date: 12 June, 2024

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

### BRAN2020 is on the order of 50TB of float data over nearly 9000 `netcdf` file assests in total.

#### required packages

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

from dask.distributed import Client, LocalCluster
import dask
import datetime
import zarr

import gc
import sys
import subprocess
from tabulate import tabulate
import os
import glob
import streamjoy
import pickle
import json

In [2]:
# Append the directory of the module to sys.path - import functions
sys.path.append('/g/data/es60/users/thomas_moore/code/Climatology-generator-demo/src/')
import bran2020_demo_functions as my_tools
from bran2020_demo_functions import keep_only_selected_vars, load_rechunker_config, print_chunks, rechunk_each_st_ocean, remove_zarr_encoding, version_table, concatinate_st_ocean_zarrs

#### start a local Dask client

In [3]:
# Set configuration options
dask.config.set({
    'distributed.comm.timeouts.connect': '90s',  # Timeout for connecting to a worker
    'distributed.comm.timeouts.tcp': '90s',  # Timeout for TCP communications
})

cluster = LocalCluster(
    n_workers=28,          # Number of workers
    threads_per_worker=1#,
    #memory_limit='8GB' # Memory limit per each worker
)
client = Client(cluster)



In [5]:
results_path = '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/'

## coordinate nomeclature

In [None]:
coordinate_names = {
    "lat_name_dict": {
        "temp": "yt_ocean",
        "salt": "yt_ocean",
        "u": "yu_ocean",
        "v": "yu_ocean",
        "mld": "yt_ocean",
        "eta_t": "yt_ocean"
    },
    "lon_name_dict": {
        "temp": "xt_ocean",
        "salt": "xt_ocean",
        "u": "xu_ocean",
        "v": "xu_ocean",
        "mld": "xt_ocean",
        "eta_t": "xt_ocean"
    },
    "depth_name_dict": {
        "temp": "st_ocean",
        "salt": "st_ocean",
        "u": "st_ocean",
        "v": "st_ocean"
    }
}

In [21]:
root_directory = results_path
# List of NetCDF file paths
file_paths = []
for root, dirs, files in os.walk(root_directory):
    for file in files:
        if file.endswith(".nc"):
            file_paths.append(os.path.join(root, file))
file_paths.sort()
file_paths

['/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_01.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_02.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_03.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_04.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_05.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_06.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_07.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_08.nc',
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_09.nc',
 '/g/data/xv83/users/tm4888/

In [22]:
# Process each file
file_dict = {}
for file_path in file_paths:
    # Extract the month value from the file name
    file_name = os.path.basename(file_path)
    month_value = int(file_name[-5:-3])  # Assuming the month value is always the last two characters before ".nc"
    
    # Build a dictionary with file path as key and month value as value
    file_dict[file_path] = month_value
    



In [23]:
file_dict

{'/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_01.nc': 1,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_02.nc': 2,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_03.nc': 3,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_04.nc': 4,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_05.nc': 5,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_06.nc': 6,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_07.nc': 7,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_08.nc': 8,
 '/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_09.nc': 9,
 

In [24]:
# Define a function to process each NetCDF file
def add_month_coordinate(file_path, month_int):
    # Open the NetCDF file
    ds = xr.open_dataset(file_path)
    
    # Add a new coordinate for the month
    ds = ds.expand_dims(dim={'month':month_int})
    
    # Save the modified dataset to a new NetCDF file
    new_file_path = f"{os.path.splitext(file_path)[0]}_with_month.nc"
    ds.to_netcdf(new_file_path)
    print(f"Saved {new_file_path}")

In [25]:
%%time
# Process each file
for file_path, month_value in file_dict.items():
    add_month_coordinate(file_path, month_value)

Saved /g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/alltime/eta_t/BRAN2020_eta_t_mth_01_with_month.nc


ValueError: conflicting sizes for dimension 'month': length 2 on 'mean_eta_t_alltime' and length 1 on {'xt_ocean': 'xt_ocean', 'yt_ocean': 'yt_ocean', 'month': 'month'}

In [27]:
ds = xr.open_dataset('/g/data/xv83/users/tm4888/PROJECTS/BRAN2020/BRAN2020_climatology/neutral/v/BRAN2020_v_mth_12.nc')

In [30]:
ds

In [29]:
ds.month.values

array(12)

# $The$ $End$

In [None]:
client.shutdown()