# Calculating area weighted time series from GFDL data (3D fields)
**Author:** Denisse Fierro Arcos  
**Date:** 2024-09-11  
  
Calculating the area weighted mean for each time step and depth bin using the GFDL-MOM6-COBALT2 data that has been clipped to match the FishMIP regional model boundaries. Only variables that include a depth dimension are processed here. Results from this script are as time series in the shiny app.

## Loading libraries

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os
from glob import glob
import re
from dask.distributed import Client

## Starting cluster

In [2]:
client = Client(threads_per_worker = 1)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 14
Total threads: 14,Total memory: 63.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:36223,Workers: 14
Dashboard: /proxy/8787/status,Total threads: 14
Started: Just now,Total memory: 63.00 GiB

0,1
Comm: tcp://127.0.0.1:37251,Total threads: 1
Dashboard: /proxy/46019/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:38127,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-3gt66ct0,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-3gt66ct0

0,1
Comm: tcp://127.0.0.1:34051,Total threads: 1
Dashboard: /proxy/42781/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:45657,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-svsjbfzm,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-svsjbfzm

0,1
Comm: tcp://127.0.0.1:38641,Total threads: 1
Dashboard: /proxy/41667/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:44707,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-_702x8tv,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-_702x8tv

0,1
Comm: tcp://127.0.0.1:40665,Total threads: 1
Dashboard: /proxy/38607/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43397,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-sqhdwz3v,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-sqhdwz3v

0,1
Comm: tcp://127.0.0.1:40711,Total threads: 1
Dashboard: /proxy/36879/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:44597,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-30rw4wcg,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-30rw4wcg

0,1
Comm: tcp://127.0.0.1:33067,Total threads: 1
Dashboard: /proxy/39241/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:38999,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-tm5ce59z,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-tm5ce59z

0,1
Comm: tcp://127.0.0.1:41827,Total threads: 1
Dashboard: /proxy/34801/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:38363,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-paih_col,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-paih_col

0,1
Comm: tcp://127.0.0.1:35005,Total threads: 1
Dashboard: /proxy/37729/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:44803,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-dzoe3shz,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-dzoe3shz

0,1
Comm: tcp://127.0.0.1:41611,Total threads: 1
Dashboard: /proxy/43197/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:42897,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-l6_il56y,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-l6_il56y

0,1
Comm: tcp://127.0.0.1:38799,Total threads: 1
Dashboard: /proxy/46659/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:46663,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-j9h1heph,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-j9h1heph

0,1
Comm: tcp://127.0.0.1:35385,Total threads: 1
Dashboard: /proxy/42885/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:40853,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-zdbk7995,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-zdbk7995

0,1
Comm: tcp://127.0.0.1:36873,Total threads: 1
Dashboard: /proxy/44255/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:36201,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-706_pnk3,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-706_pnk3

0,1
Comm: tcp://127.0.0.1:38969,Total threads: 1
Dashboard: /proxy/45277/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:39843,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-_7wgdntk,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-_7wgdntk

0,1
Comm: tcp://127.0.0.1:37207,Total threads: 1
Dashboard: /proxy/36609/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:40189,
Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-5k6co2q7,Local directory: /jobfs/124866223.gadi-pbs/dask-scratch-space/worker-5k6co2q7


## Defining basic variables

In [3]:
#Location of zarr files
base_dir = '/g/data/vf71/fishmip_inputs/ISIMIP3a/regional_inputs/obsclim/025deg'

#Get list of zarr files
zarr_list = glob(os.path.join(base_dir, 'download_data/*zarr'))

#Getting list of area grid files per regional model
area_list = glob(os.path.join(base_dir, 'area_grid/*'))

#Get a list of regional model names from the area file names
reg = [re.findall('arcmin(_.*_)fixed', f)[0] for f in area_list]

#Folder where mean climatologies with all data will be saved
base_out_ts = os.path.join(base_dir, 'ts_data')
os.makedirs(base_out_ts, exist_ok = True)

## Defining function to calculate weighted time series

In [24]:
def calc_ts(region, area_list, zarr_list, folder_out, comp_woa = False):
    '''
    Calculate area weighted time series.
    
    Inputs:
    region (character): Name of FishMIP regional models as included in file names
    area_list (list): List contains full path to area files 
    zarr_list (character): List contains paths to files used in weighted mean 
    calculations
    folder_out (character): Full path to folder where results will be stored
    comp_woa (boolean): Default is False. If set to True, it will slice dat to 
    match WOA and calculate monthly climatology before calculating time series
    '''

    #Get area of grid cell file for the region of interest
    [area_reg] = [f for f in area_list if region in f]
    area_reg = xr.open_zarr(area_reg)['cellareao']
    #Transform NA to zeros
    area_reg = xr.where(np.isnan(area_reg), 0, area_reg)

    #Get list of all files for region of interest     
    reg_list = [f for f in zarr_list if region in f]

    #Calculate weighted time series for each file
    for f in reg_list:
        #Load file
        da = xr.open_zarr(f)
        #Get name of variable included in file
        [var] = list(da.data_vars)
        da = da[var]

        #Save attributes
        da_attrs = pd.DataFrame([da.attrs])

        #If monthly is True, then calculate monthly climatology
        if comp_woa:
            da = da.sel(time = slice('1981-01-01', '2010-12-31'))
            da = da.groupby('time.month').mean('time')
        
        #Calculate weighted mean
        weighted_da = da.weighted(area_reg)
        weighted_mean = weighted_da.mean(('lon', 'lat'))
    
        #Transform to data frame
        df = weighted_mean.to_series().to_frame().reset_index().dropna()
        df = df.rename(columns = {da.name: 'vals', 
                                  'depth_bin_m': 'depth'}).reset_index(drop = True)
        #Include original dataset attributes
        df = pd.concat([df, da_attrs], axis = 1)
        #Saving data frame
        base_file = os.path.basename(f).replace('monthly', 'weighted_mean_ts').replace('zarr', 'parquet')
        df.to_parquet(os.path.join(folder_out, base_file))

In [8]:
for r in reg:
    calc_ts(r, area_list, zarr_list, base_out_ts)

In [None]:
base_out_comp = os.path.join(base_out_ts, 'comp_clim')
os.makedirs(base_out_ts, exist_ok = True)

for r in reg:
    calc_ts(r, area_list, zarr_list, base_out_comp, comp_woa = True)

INFO:flox:Entering _validate_reindex: reindex is None
INFO:flox:Leaving _validate_reindex: method = None, returning None
INFO:flox:_choose_engine: Choosing 'numpy'
INFO:flox:_choose_method: method is None
INFO:flox:_choose_method: choosing preferred_method=blockwise
INFO:flox:Entering _validate_reindex: reindex is None
INFO:flox:Leaving _validate_reindex: reindex is False
INFO:flox:Entering _validate_reindex: reindex is None
INFO:flox:Leaving _validate_reindex: method = None, returning None
INFO:flox:_choose_engine: Choosing 'numpy'
INFO:flox:_choose_method: method is None
INFO:flox:_choose_method: choosing preferred_method=blockwise
INFO:flox:Entering _validate_reindex: reindex is None
INFO:flox:Leaving _validate_reindex: reindex is False
INFO:flox:Entering _validate_reindex: reindex is None
INFO:flox:Leaving _validate_reindex: method = None, returning None
INFO:flox:_choose_engine: Choosing 'numpy'
INFO:flox:_choose_method: method is None
INFO:flox:_choose_method: choosing preferred_