# Compute Global Mean Surface Temperature from monthly data

In [2]:
import numpy as np
import scipy as sp
import xarray as xr
import matplotlib as plt
import glob
import re
import nest_asyncio
nest_asyncio.apply()

In [1]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report
from pelicanfs.core import PelicanFileSystem, PelicanMap
import fsspec.implementations.http as fshttp

In [3]:
# # File paths
# rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
# rda_data    = '/gpfs/csfs1/collections/rda/data/'
# era5_path   = rda_data + 'ds633.0/e5.oper.an.sfc/'
# zarr_path   = rda_scratch + '/tas_zarr/'

In [3]:
# #Pelican test path
pelican_test = '/ncar/rda/harshah/pelican_test'
pelican_director = 'https://osdf-director.osg-htc.org/'
zarr_path   = pelican_test + '/tas_zarr/'

pelfs = PelicanFileSystem(pelican_director)

In [5]:
#httpfs = fshttp.HTTPFileSystem()

In [6]:
# Use this path for https access
#rda_url      =  'https://request.rda.ucar.edu/'
#zarr_path    = rda_url + 'harshah/pelican_test/tas_zarr/'

In [7]:
# Create a PBS cluster object
#cluster = PBSCluster(
#    job_name = 'dask-wk24-hpc',
#    cores = 1,
#    memory = '8GiB',
#    processes = 1,
#    local_directory = '/Users/useradmin/pelican/jupyter notebooks/zarr_experiments/dask/spill',
#    resource_spec = 'select=1:ncpus=1:mem=8GB',
#    queue = 'casper',
#    walltime = '1:30:00',
    #interface = 'ib0'
#    interface = 'en0'
#)

In [4]:
# GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time'}
    return (ds * weight).mean(other_dims)

In [9]:
#client = Client(cluster)
#client

In [10]:
#cluster.scale(3)
#cluster

### Comapare GMST calculation

In [12]:
pel_zarr = PelicanMap(zarr_path + 'e5_tas2m_monthly_1940_2023.zarr', pelfs)
print(pelican_director + zarr_path + 'e5_tas2m_monthly_1940_2023.zarr')
tas_monthly_zarr      = xr.open_zarr(pel_zarr)
tas_monthly_zarr

https://osdf-director.osg-htc.org//ncar/rda/harshah/pelican_test/tas_zarr/e5_tas2m_monthly_1940_2023.zarr


1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  tas_monthly_zarr      = xr.open_zarr(pel_zarr)


### Use disk access to check if the dataset can be opened properly

In [13]:
tas_montly_zarr1 =         xr.open_zarr('/gpfs/csfs1/collections/rda/data/harshah/pelican_test/tas_zarr/' + 'e5_tas2m_monthly_1940_2023.zarr')
tas_montly_zarr1

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.90 GiB 495.07 MiB Shape (1009, 721, 1440) (1000, 721, 180) Dask graph 16 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  1009,

Unnamed: 0,Array,Chunk
Bytes,3.90 GiB,495.07 MiB
Shape,"(1009, 721, 1440)","(1000, 721, 180)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [12]:
# %%time
#tas_monthly_nc  = xr.open_dataset(zarr_path +'e5_tas2m_monthly_1940_2023.nc',engine='netcdf4',chunks='auto').VAR_2T
#tas_monthly_nc

#### Now compute (spatially weighted) Global Mean

In [13]:
# %%time
# gmst_nc = global_mean(tas_monthly_nc)

In [14]:
%%time
gmst_zarr = global_mean(tas_monthly_zarr)

CPU times: user 5.45 ms, sys: 1.82 ms, total: 7.27 ms
Wall time: 6.73 ms


In [15]:
%%time
## Generate performance report
#with performance_report(filename ='e5_zarr_gmst.html'):
#     gmst_zarr0 = gmst_zarr.compute()

CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 1.91 µs


In [16]:
# %%time
# ## Generate performance report
# with performance_report(filename ='e5_nc_gmst.html'):
#    gmst_nc0 = gmst_nc.compute()

In [17]:
#gmst_zarr0.plot()

In [18]:
#gmst_nc0.plot()