# Calculating area weighted time series from GFDL data (3D fields)
**Author:** Denisse Fierro Arcos  
**Date:** 2024-09-11  
  
Calculating the area weighted mean for each time step and depth bin using the GFDL-MOM6-COBALT2 data that has been clipped to match the FishMIP regional model boundaries. Only variables that include a depth dimension are processed here. Results from this script are as time series in the shiny app.

## Loading libraries

In [11]:
import xarray as xr
import numpy as np
import pandas as pd
import os
from glob import glob
import re
from dask.distributed import Client

## Starting cluster

In [2]:
client = Client(threads_per_worker = 1)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 14
Total threads: 14,Total memory: 63.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39913,Workers: 14
Dashboard: /proxy/8787/status,Total threads: 14
Started: Just now,Total memory: 63.00 GiB

0,1
Comm: tcp://127.0.0.1:38745,Total threads: 1
Dashboard: /proxy/41071/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:36135,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-r32jwhm9,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-r32jwhm9

0,1
Comm: tcp://127.0.0.1:34721,Total threads: 1
Dashboard: /proxy/43781/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43011,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ddiejhpg,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ddiejhpg

0,1
Comm: tcp://127.0.0.1:46843,Total threads: 1
Dashboard: /proxy/40139/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43813,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-i5beqpwq,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-i5beqpwq

0,1
Comm: tcp://127.0.0.1:41035,Total threads: 1
Dashboard: /proxy/37125/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:40731,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-vij4baj7,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-vij4baj7

0,1
Comm: tcp://127.0.0.1:39227,Total threads: 1
Dashboard: /proxy/34441/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:44147,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-rsfkb6nt,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-rsfkb6nt

0,1
Comm: tcp://127.0.0.1:43779,Total threads: 1
Dashboard: /proxy/45253/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43435,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-el1btowo,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-el1btowo

0,1
Comm: tcp://127.0.0.1:43483,Total threads: 1
Dashboard: /proxy/38313/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:46797,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-lhkeazlk,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-lhkeazlk

0,1
Comm: tcp://127.0.0.1:44221,Total threads: 1
Dashboard: /proxy/35535/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43549,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-u4vmvtwj,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-u4vmvtwj

0,1
Comm: tcp://127.0.0.1:36455,Total threads: 1
Dashboard: /proxy/41537/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:36757,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-eom_5s9i,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-eom_5s9i

0,1
Comm: tcp://127.0.0.1:34885,Total threads: 1
Dashboard: /proxy/45019/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:34355,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ap7a6j4o,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ap7a6j4o

0,1
Comm: tcp://127.0.0.1:34767,Total threads: 1
Dashboard: /proxy/42261/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:45863,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ab6ru7yt,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-ab6ru7yt

0,1
Comm: tcp://127.0.0.1:39999,Total threads: 1
Dashboard: /proxy/35115/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:37995,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-lwsz2g93,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-lwsz2g93

0,1
Comm: tcp://127.0.0.1:36849,Total threads: 1
Dashboard: /proxy/36697/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:40105,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-f3lncehe,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-f3lncehe

0,1
Comm: tcp://127.0.0.1:40655,Total threads: 1
Dashboard: /proxy/43301/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:39361,
Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-yh5g4fht,Local directory: /jobfs/124618452.gadi-pbs/dask-scratch-space/worker-yh5g4fht


## Defining basic variables

In [15]:
#Location of zarr files
base_dir = '/g/data/vf71/fishmip_inputs/ISIMIP3a/regional_inputs/obsclim/025deg'

#Get list of zarr files
zarr_list = glob(os.path.join(base_dir, 'download_data/*zarr'))

#Getting list of area grid files per regional model
area_list = glob(os.path.join(base_dir, 'area_grid/*'))

#Get a list of regional model names from the area file names
reg = [re.findall('arcmin(_.*_)fixed', f)[0] for f in area_list]

#Folder where mean climatologies with all data will be saved
base_out_ts = os.path.join(base_dir, 'ts_data')
os.makedirs(base_out_ts, exist_ok = True)

## Defining function to calculate weighted time series

In [7]:
def calc_ts(region, area_list, zarr_list, folder_out):
    '''
    Calculate area weighted time series.
    
    Inputs:
    region (character): Name of FishMIP regional models as included in file names
    area_list (list): List contains full path to area files 
    zarr_list (character): List contains paths to files used in weighted mean 
    calculations
    folder_out (character): Full path to folder where results will be stored
    '''

    #Get area of grid cell file for the region of interest
    [area_reg] = [f for f in area_list if region in f]
    area_reg = xr.open_zarr(area_reg)['cellareao']
    #Transform NA to zeros
    area_reg = xr.where(np.isnan(area_reg), 0, area_reg)

    #Get list of all files for region of interest     
    reg_list = [f for f in zarr_list if region in f]

    #Calculate weighted time series for each file
    for f in reg_list:
        #Load file
        da = xr.open_zarr(f)
        #Get name of variable included in file
        [var] = list(da.data_vars)
        da = da[var]

        #Save attributes
        da_attrs = pd.DataFrame([da.attrs])
    
        #Calculate weighted mean
        weighted_da = da.weighted(area_reg)
        weighted_mean = weighted_da.mean(('lon', 'lat'))
    
        #Transform to data frame
        df = weighted_mean.to_series().to_frame().reset_index().dropna()
        df = df.rename(columns = {da.name: 'vals', 
                                  'depth_bin_m': 'depth'}).reset_index(drop = True)
        #Include original dataset attributes
        df = pd.concat([df, da_attrs], axis = 1)
        #Saving data frame
        base_file = os.path.basename(f).replace('monthly', 'weighted_mean_ts')
        df.to_parquet(os.path.join(folder_out, base_file))

In [None]:
for r in reg:
    calc_ts(r, area_list, zarr_list, base_out_ts)