In [6]:
from dask_jobqueue import SLURMCluster 
from dask.distributed import Client 
  
cluster = SLURMCluster(cores=28,name='make_profiles',walltime='00:30:00',job_extra=['--constraint=HSW24','--exclusive','--nodes=1'],memory='120GB',interface='ib0') 
cluster.scale(196)
cluster

from dask.distributed import Client
client = Client(cluster)
client



0,1
Client  Scheduler: tcp://172.30.100.1:42290  Dashboard: http://172.30.100.1:35907/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [25]:
cluster.scale(224)
cluster

from dask.distributed import Client
client = Client(cluster)
client


0,1
Client  Scheduler: tcp://172.30.100.1:42290  Dashboard: http://172.30.100.1:35907/status,Cluster  Workers: 196  Cores: 196  Memory: 840.84 GB


In [7]:
!squeue -u albert7a

import time
nb_workers = 0
while True:
    nb_workers = len(client.scheduler_info()["workers"])
    if nb_workers >= 2:
        break
    time.sleep(1)
print(nb_workers)

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           8192105     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192101     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192102     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192103     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192104     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192100     hsw24 make_pro albert7a PD       0:00      1 (Priority)
           8192099     hsw24 make_pro albert7a  R       0:02      1 n2833
4


## 0. Modules and libraries

In [8]:
import warnings
warnings.filterwarnings("ignore")
import dask 
import numpy as np
import xarray as xr
import time
import numpy.ma as ma
import matplotlib.cm as mplcm
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.ticker as mticker
import os 
import time 
import glob

import zarr

%matplotlib inline


## 1. Data 

### eNATL60

In [9]:
# these are variables I just want to drop forever

drop_vars = ['nav_lat', 'nav_lon', 'somxl010', 'sosaline', 'sosstsst']
extra_coord_vars = []
chunks = dict(time_counter=1)
open_kwargs = dict(drop_variables=(drop_vars + extra_coord_vars),
                   chunks=chunks,
                   decode_cf=True,
                   decode_times=True,
                   concat_dim="time_counter")  #, combine='nested')


In [11]:
files=sorted(glob.glob('/store/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB002*-S/*/eNATL60*gridT-2D*nc'))
ds = xr.open_mfdataset(files, parallel=True, **open_kwargs)


In [12]:
template = xr.open_dataset(files[0], decode_cf=False, decode_times=False)
ds["nav_lat"] = template["nav_lat"]
ds["nav_lon"] = template["nav_lon"]
ds
del template


In [13]:
ds

<xarray.Dataset>
Dimensions:               (axis_nbounds: 2, time_counter: 9528, x: 8354, y: 4729)
Coordinates:
    time_centered         (time_counter) datetime64[ns] dask.array<shape=(9528,), chunksize=(1,)>
  * time_counter          (time_counter) datetime64[ns] 2009-06-30T00:30:00 ... 2010-07-31T23:30:00
Dimensions without coordinates: axis_nbounds, x, y
Data variables:
    time_centered_bounds  (time_counter, axis_nbounds) datetime64[ns] dask.array<shape=(9528, 2), chunksize=(1, 2)>
    time_counter_bounds   (time_counter, axis_nbounds) datetime64[ns] dask.array<shape=(9528, 2), chunksize=(1, 2)>
    sossheig              (time_counter, y, x) float32 dask.array<shape=(9528, 4729, 8354), chunksize=(1, 4729, 8354)>
    nav_lat               (y, x) float32 ...
    nav_lon               (y, x) float32 ...
Attributes:
    name:         /scratch/tmp/3735876/eNATL60-BLB002_1h_20090630_20090704_gr...
    description:  ocean T grid variables
    title:        ocean T grid variables
    Con

In [14]:
ssh_eNATL60=ds.sossheig

In [15]:
ssh_eNATL60

<xarray.DataArray 'sossheig' (time_counter: 9528, y: 4729, x: 8354)>
dask.array<shape=(9528, 4729, 8354), dtype=float32, chunksize=(1, 4729, 8354)>
Coordinates:
    time_centered  (time_counter) datetime64[ns] dask.array<shape=(9528,), chunksize=(1,)>
  * time_counter   (time_counter) datetime64[ns] 2009-06-30T00:30:00 ... 2010-07-31T23:30:00
Dimensions without coordinates: y, x
Attributes:
    standard_name:       sea_surface_height_above_geoid
    long_name:           sea surface height
    units:               m
    online_operation:    average
    interval_operation:  40 s
    interval_write:      1 h
    cell_methods:        time: mean (interval: 40 s)

In [16]:
ssh_eNATL60_1d_2009=ssh_eNATL60.sel(time_counter=slice('2009-06-30','2009-12-31')).groupby('time_counter.dayofyear').mean(dim='time_counter')
ssh_eNATL60_1d_2010=ssh_eNATL60.sel(time_counter=slice('2010-01-01','2010-07-31')).groupby('time_counter.dayofyear').mean(dim='time_counter')


In [17]:
ssh_eNATL60_1d=xr.concat([ssh_eNATL60_1d_2009,ssh_eNATL60_1d_2010],dim='dayofyear')

In [18]:
std_ssh_eNATL60=ssh_eNATL60_1d.std(dim='dayofyear')

In [19]:
std_ssh_eNATL60

<xarray.DataArray 'sossheig' (y: 4729, x: 8354)>
dask.array<shape=(4729, 8354), dtype=float32, chunksize=(4729, 8354)>
Dimensions without coordinates: y, x

In [20]:
mean_ssh_eNATL60=ssh_eNATL60_1d.mean(dim='dayofyear')-ssh_eNATL60.mean()

In [21]:
mean_ssh_eNATL60

<xarray.DataArray 'sossheig' (y: 4729, x: 8354)>
dask.array<shape=(4729, 8354), dtype=float32, chunksize=(4729, 8354)>
Dimensions without coordinates: y, x

In [22]:
ssh_eNATL60.attrs

OrderedDict([('standard_name', 'sea_surface_height_above_geoid'),
             ('long_name', 'sea surface height'),
             ('units', 'm'),
             ('online_operation', 'average'),
             ('interval_operation', '40 s'),
             ('interval_write', '1 h'),
             ('cell_methods', 'time: mean (interval: 40 s)')])

In [26]:
std_dataset=std_ssh_eNATL60.to_dataset(name='std_sossheig')
std_dataset['std_sossheig'].attrs=ssh_eNATL60.attrs
std_dataset['std_sossheig'].attrs['standard_name']='standart deviation of ssh'
std_dataset['std_sossheig'].attrs['long_name']='standart deviation of sea surface height'
std_dataset.attrs['global_attribute']= 'standart deviation of ssh computed on occigen '
std_dataset.to_netcdf(path='/scratch/cnt0024/hmg2840/albert7a/eNATL60/eNATL60-BLB002-S/1h/SSH-MEAN-STD/eNATL60-BLB002_std-ssh_1d.nc',mode='w')

KilledWorker: ("('concatenate-open_dataset-getitem-ef0493b1d58d0fd8871eae25e53150b8', 16, 0, 0)", <Worker 'tcp://172.30.10.5:43753', memory: 0, processing: 78>)

In [None]:
mean_ssh_eNATL60=std_ssh_eNATL60.to_dataset(name='mean_sossheig')
mean_ssh_eNATL60['mean_sossheig'].attrs=ssh_eNATL60.attrs
mean_ssh_eNATL60['mean_sossheig'].attrs['standard_name']='temporal mean of ssh'
mean_ssh_eNATL60['mean_sossheig'].attrs['long_name']='temporal mean of sea surface height'
mean_ssh_eNATL60.attrs['global_attribute']= 'temporal mean of ssh computed on occigen '
mean_ssh_eNATL60.to_netcdf(path='/scratch/cnt0024/hmg2840/albert7a/eNATL60/eNATL60-BLB002-S/1h/SSH-MEAN-STD/eNATL60-BLB002_mean-ssh_1d.nc',mode='w')

In [27]:
client.close()
cluster.close()

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
