# Compute Global Mean Surface Temperature from monthly data

In [1]:
import numpy as np
import scipy as sp
import xarray as xr
import matplotlib as plt
import glob
import re
import nest_asyncio
nest_asyncio.apply()

In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report
from pelicanfs.core import PelicanFileSystem, PelicanMap
import fsspec.implementations.http as fshttp

In [3]:
# # File paths
# rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
# rda_data    = '/gpfs/csfs1/collections/rda/data/'
# era5_path   = rda_data + 'ds633.0/e5.oper.an.sfc/'
# zarr_path   = rda_scratch + '/tas_zarr/'

In [4]:
# #Pelican test path
pelican_test = '/ncar/rda/harshah/pelican_test'
pelican_director = 'https://osdf-director.osg-htc.org/'
zarr_path   = pelican_test + '/tas_zarr/'

pelfs = PelicanFileSystem(pelican_director)

In [5]:
#httpfs = fshttp.HTTPFileSystem()

In [6]:
# Use this path for https access
#rda_url      =  'https://request.rda.ucar.edu/'
#zarr_path    = rda_url + 'harshah/pelican_test/tas_zarr/'

In [29]:
# Create a PBS cluster object
cluster = PBSCluster(
   job_name = 'dask-wk24-hpc',
   cores = 1,
   memory = '8GiB',
   processes = 1,
   local_directory = '/Users/useradmin/pelican/jupyter notebooks/zarr_experiments/dask/spill',
   resource_spec = 'select=1:ncpus=1:mem=8GB',
   queue = 'casper',
   walltime = '1:30:00',
    # #interface = 'ib0',
   interface = 'ext'
)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 39589 instead


In [8]:
GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time'}
    return (ds * weight).mean(other_dims)

In [9]:
# client = Client(cluster)
# client

In [10]:
#cluster.scale(3)
#cluster

### Comapare GMST calculation

In [20]:
%%time
pel_zarr              = PelicanMap(zarr_path + 'e5_tas2m_monthly_1940_2023.zarr', pelfs)
print(pelican_director + zarr_path + 'e5_tas2m_monthly_1940_2023.zarr')
#
tas_monthly_zarr      = xr.open_zarr(pel_zarr).VAR_2T
tas_monthly_zarr

https://osdf-director.osg-htc.org//ncar/rda/harshah/pelican_test/tas_zarr/e5_tas2m_monthly_1940_2023.zarr


Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.90 GiB 495.07 MiB Shape (1009, 721, 1440) (1000, 721, 180) Dask graph 16 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  1009,

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Use disk access to check if the dataset can be opened properly

In [12]:
# tas_montly_zarr1 =         xr.open_zarr('/gpfs/csfs1/collections/rda/data/harshah/pelican_test/tas_zarr/' + 'e5_tas2m_monthly_1940_2023.zarr')
# tas_montly_zarr1

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.90 GiB 495.07 MiB Shape (1009, 721, 1440) (1000, 721, 180) Dask graph 16 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  1009,

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
# %%time
#tas_monthly_nc  = xr.open_dataset(zarr_path +'e5_tas2m_monthly_1940_2023.nc',engine='netcdf4',chunks='auto').VAR_2T
#tas_monthly_nc

#### Now compute (spatially weighted) Global Mean

In [14]:
# %%time
# gmst_nc = global_mean(tas_monthly_nc)

In [15]:
%%time
gmst_zarr = global_mean(tas_monthly_zarr)

CPU times: user 12.1 ms, sys: 0 ns, total: 12.1 ms
Wall time: 21.8 ms


In [16]:
%%time
## Generate performance report
#with performance_report(filename ='e5_zarr_gmst.html'):
#     gmst_zarr0 = gmst_zarr.compute()

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 3.34 µs


In [17]:
# %%time
# ## Generate performance report
# with performance_report(filename ='e5_nc_gmst.html'):
#    gmst_nc0 = gmst_nc.compute()

In [18]:
#gmst_zarr0.plot()

In [19]:
#gmst_nc0.plot()