# Compute Global Mean Surface Temperature from monthly data

In [2]:
import numpy as np
import scipy as sp
import xarray as xr
import matplotlib as plt
import glob
import re

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
# # File paths
# rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
# rda_data    = '/gpfs/csfs1/collections/rda/data/'
# era5_path   = rda_data + 'ds633.0/e5.oper.an.sfc/'
# zarr_path   = rda_scratch + '/tas_zarr/'

In [5]:
# #Pelican test path
# pelican_test = '/glade/campaign/collections/rda/transfer/harshah/pelican_test'
# zarr_path   = pelican_test + '/tas_zarr/'

In [6]:
# Use this path for https access
rda_url      =  'https://request.rda.ucar.edu/'
zarr_path    = rda_url + 'harshah/pelican_test/tas_zarr/'

In [7]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = '/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '1:30:00',
    #interface = 'ib0'
    interface = 'ext'
)

In [8]:
# GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time'}
    return (ds * weight).mean(other_dims)

In [9]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:32977,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [8]:
cluster.scale(3)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:43567,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


### Comapare GMST calculation

In [13]:
tas_monthly_zarr      = xr.open_zarr(zarr_path +'e5_tas2m_monthly_1940_2023.zarr').VAR_2T
tas_monthly_zarr

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.90 GiB 495.07 MiB Shape (1009, 721, 1440) (1000, 721, 180) Dask graph 16 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  1009,

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [12]:
# %%time
# tas_monthly_nc  = xr.open_dataset(zarr_path +'e5_tas2m_monthly_1940_2023.nc',engine='netcdf4',chunks='auto').VAR_2T
# tas_monthly_nc

#### Now compute (spatially weighted) Global Mean

In [11]:
# %%time
# gmst_nc = global_mean(tas_monthly_nc)

CPU times: user 10.3 ms, sys: 0 ns, total: 10.3 ms
Wall time: 10.3 ms


In [14]:
%%time
gmst_zarr = global_mean(tas_monthly_zarr)

CPU times: user 10.9 ms, sys: 4.26 ms, total: 15.2 ms
Wall time: 16.8 ms


In [None]:
%%time
## Generate performance report
with performance_report(filename ='e5_zarr_gmst.html'):
     gmst_zarr0 = gmst_zarr.compute()

In [None]:
# %%time
# ## Generate performance report
# with performance_report(filename ='e5_nc_gmst.html'):
#      gmst_nc0 = gmst_nc.compute()

In [None]:
gmst_zarr0.plot()

In [None]:
# gmst_nc0.plot()