In [1]:
import h5netcdf

In [2]:
# libraries
import os
import joblib
from osgeo import gdal
import pandas as pd
import numpy as np
import xarray as xr
import glob
import re
import rioxarray
import dask.array
from dask.distributed import Client, progress

In [3]:
client = Client(n_workers=8, threads_per_worker=8)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35381 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/35381/status,

0,1
Dashboard: /proxy/35381/status,Workers: 8
Total threads: 64,Total memory: 240.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:44497,Workers: 8
Dashboard: /proxy/35381/status,Total threads: 64
Started: Just now,Total memory: 240.00 GiB

0,1
Comm: tcp://127.0.0.1:38641,Total threads: 8
Dashboard: /proxy/43003/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:39461,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-4ezodpd3,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-4ezodpd3

0,1
Comm: tcp://127.0.0.1:45065,Total threads: 8
Dashboard: /proxy/32969/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:39639,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-7khq4rpi,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-7khq4rpi

0,1
Comm: tcp://127.0.0.1:38011,Total threads: 8
Dashboard: /proxy/33403/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:42803,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-ql6zsg__,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-ql6zsg__

0,1
Comm: tcp://127.0.0.1:38531,Total threads: 8
Dashboard: /proxy/33953/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:46779,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-b2j_4hte,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-b2j_4hte

0,1
Comm: tcp://127.0.0.1:42279,Total threads: 8
Dashboard: /proxy/37165/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:36649,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-z1t51tsd,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-z1t51tsd

0,1
Comm: tcp://127.0.0.1:44421,Total threads: 8
Dashboard: /proxy/34235/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:37881,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-xb5woaro,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-xb5woaro

0,1
Comm: tcp://127.0.0.1:37037,Total threads: 8
Dashboard: /proxy/42091/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:45981,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-spxspnb1,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-spxspnb1

0,1
Comm: tcp://127.0.0.1:33423,Total threads: 8
Dashboard: /proxy/44571/status,Memory: 30.00 GiB
Nanny: tcp://127.0.0.1:38483,
Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-791yo3pf,Local directory: /scratch-local/qiahan.4250885/dask-scratch-space/worker-791yo3pf


In [4]:
workingPath = "/gpfs/work2/0/ttse0619/qianqian/global_data_Qianqian/"

In [5]:
# function for getting directory of input data
def get_directories_with_number_and_ending(directory_path, ending):
    directories = []
    if ending:
        pattern = re.compile(r'^\d+.*{}$'.format(re.escape(ending)))  # Match directory names with number at the beginning and specific ending
    else:
        pattern = re.compile(r'^\d+$')  # Match directory names with only numbers

    for entry in os.scandir(directory_path):
        if entry.is_dir():
            directory_name = entry.name
            if pattern.match(directory_name):
                directories.append(directory_name)

    return directories

In [6]:
### define the path of input data, set the reference file for spatial resample
inputData = workingPath+"1input_data/"
# get all the filefolders named as year
year_list = get_directories_with_number_and_ending(workingPath+"1input_data/", "global")
# the input data from year[0] to year[..], based on the process id in sbatch script
year = year_list[4-1]
print(year)

2015global


In [7]:
import geopandas as gpd
from shapely.geometry import box

shapefile_path = "/gpfs/work2/0/ttse0619/qianqian/global_data_Qianqian/Emulator/input_data/EuropeBoundary.shp"
gdf = gpd.read_file(shapefile_path)
bbox = gdf.total_bounds
bbox

array([-31.28903052,  34.93055094,  68.93136141,  81.85192337])

In [7]:
#[328.71096947, 68.93136141]

## 0) directly clip, this will lose data in area [-31,0], but no problem in export

In [26]:
def custom_preprocess(ds):
    ds = ds.sel(
        latitude=slice(bbox[3], bbox[1]),  
        longitude=slice(bbox[0], bbox[2])  
    )
    return ds

In [27]:
### 0) read era5land data
# select Europe data of ERA5Land and save it as netcdf

all1 = xr.open_mfdataset(inputData+year+"/era5land/era5-land*.nc", preprocess=custom_preprocess, chunks=False)

## 1) clip area [328,360] and [0,68] separately, and then conat. Convert[0,360] to [-180,180] after open_mfdataset, open_mfdataset is fast, but unmanaged memory is too high in export. After I set chunks for open_mfdataset, no warnings and error, but way slower than when I only export Lon > 0

In [8]:
def custom_preprocess(ds):
    lon_min = 328.71096947
    lon_max = 68.93136141
    ds_part1 = ds.sel(
        latitude=slice(bbox[3], bbox[1]),
        longitude=slice(lon_min, 360)
    )
    ds_part2 = ds.sel(
        latitude=slice(bbox[3], bbox[1]),
        longitude=slice(0, lon_max)
    )
    ds = xr.concat([ds_part1, ds_part2], dim='longitude')
    return ds

In [11]:
### 0) read era5land data
# select Europe data of ERA5Land and save it as netcdf

all1 = xr.open_mfdataset(inputData+year+"/era5land/era5-land*.nc", preprocess=custom_preprocess, chunks={"longitude":1000})

lon_name = 'longitude'
all1['longitude_adjusted'] = xr.where(
    all1[lon_name] > 180,
    all1[lon_name] - 360,
    all1[lon_name])
all1 = (
    all1
    .swap_dims({lon_name: 'longitude_adjusted'})
    .sel(**{'longitude_adjusted': sorted(all1.longitude_adjusted)})
    .drop(lon_name))
all1 = all1.rename({'longitude_adjusted': lon_name})

In [12]:
all1

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 918.45 MiB Shape (8760, 469, 1002) (744, 469, 690) Dask graph 24 chunks in 61 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,918.45 MiB
Shape,"(8760, 469, 1002)","(744, 469, 690)"
Dask graph,24 chunks in 61 graph layers,24 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## 2) convert [0,360] to [-180,180], and then clip, too much computation for open_mfdataset and unmanaged memory is too high in export

In [11]:
def custom_preprocess(ds):
    ## 1) first way to convert
    # lon_name = 'longitude'
    # ds['longitude_adjusted'] = xr.where(
    #     ds[lon_name] > 180,
    #     ds[lon_name] - 360,
    #     ds[lon_name])
    # ds = (
    #     ds
    #     .swap_dims({lon_name: 'longitude_adjusted'})
    #     .sel(**{'longitude_adjusted': sorted(ds.longitude_adjusted)})
    #     .drop(lon_name))
    # ds = ds.rename({'longitude_adjusted': lon_name})
    
    ## 2) second way to convert
    lon_name = 'longitude'
    ds['longitude_adjusted'] = xr.where(
        ds[lon_name] > 180,
        ds[lon_name] - 360,
        ds[lon_name])
    ds = ds.assign_coords({lon_name: ds['longitude_adjusted']}).sortby(lon_name)
    
    ## 3) third way to convert
    # ds['longitude'] = (ds['longitude'] + 180) % 360 - 180
    # ds = ds.sortby('longitude')
    
    ds = ds.sel(
        latitude=slice(bbox[3], bbox[1]),  
        longitude=slice(bbox[0], bbox[2])  
    )
    return ds
all1 = xr.open_mfdataset(inputData+year+"/era5land/era5-land*.nc", preprocess=custom_preprocess, chunks=False)

In [None]:
### 0) read era5land data
# select Europe data of ERA5Land and save it as netcdf
all1 = xr.open_mfdataset(inputData+year+"/era5land/era5-land*.nc", preprocess=custom_preprocess, chunks=False)

In [14]:
all1

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 49 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 49 graph layers,12 chunks in 49 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## export

In [13]:
all1.chunks

Frozen({'time': (744, 672, 744, 720, 744, 720, 744, 744, 720, 744, 720, 744), 'latitude': (469,), 'longitude': (312, 690)})

In [14]:
all2 = all1.chunk({"latitude":469,"longitude":1002})

In [15]:
all2.chunks

Frozen({'time': (744, 672, 744, 720, 744, 720, 744, 744, 720, 744, 720, 744), 'latitude': (469,), 'longitude': (1002,)})

In [16]:
all2

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.34 GiB 1.30 GiB Shape (8760, 469, 1002) (744, 469, 1002) Dask graph 12 chunks in 62 graph layers Data type float32 numpy.ndarray",1002  469  8760,

Unnamed: 0,Array,Chunk
Bytes,15.34 GiB,1.30 GiB
Shape,"(8760, 469, 1002)","(744, 469, 1002)"
Dask graph,12 chunks in 62 graph layers,12 chunks in 62 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [17]:
%%time
all2.to_netcdf('/gpfs/work2/0/ttse0619/qianqian/global_data_Qianqian/1input_data/2015global/era5land/era5land2015_10km25.nc',format='NETCDF4')

CPU times: user 48 s, sys: 6.88 s, total: 54.9 s
Wall time: 3min 40s


In [14]:
# %%time
# write_netcdf = all2.to_netcdf('/gpfs/work2/0/ttse0619/qianqian/global_data_Qianqian/1input_data/2015global/era5land/era5land2015_10km3.nc',format='NETCDF4')
# write_netcdf.compute()

AttributeError: 'NoneType' object has no attribute 'compute'

# test is the exported netcdf complete

In [6]:
lat1 = 40
lat2 = 60
lon1 = 2
lon2 = 22

In [11]:
test = xr.open_dataset(inputData+year+"/era5land/era5land2015_10km1.nc", chunks={"latitude":51, "longitude":51}) 
test = test.sel(
    latitude=slice(lat2,lat1),  
    longitude=slice(lon1, lon2)  
)

In [12]:
test

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.32 GiB 86.92 MiB Shape (8760, 201, 201) (8760, 51, 51) Dask graph 25 chunks in 3 graph layers Data type float32 numpy.ndarray",201  201  8760,

Unnamed: 0,Array,Chunk
Bytes,1.32 GiB,86.92 MiB
Shape,"(8760, 201, 201)","(8760, 51, 51)"
Dask graph,25 chunks in 3 graph layers,25 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
a = np.nanmean(test['u10'], axis=(1,2))
nan_mask = np.isnan(a)

# use np.sum() to count the number of True, which is NaN
nan_count = np.sum(nan_mask)

  a = np.nanmean(test['u10'], axis=(1,2))


In [14]:
nan_count

7344