In [1]:
import starepandas
from pyhdf.SD import SD
import numpy
import pystare
import xarray
import dask
import datetime

In [2]:
client = dask.distributed.Client(n_workers=4)

In [3]:
file_path = '../tests/data/granules/MOD05_L2.A2019336.0000.061.2019336211522.hdf'
hdf = SD(file_path)
lon = hdf.select('Longitude').get().astype(numpy.double)
lat = hdf.select('Latitude').get().astype(numpy.double)

In [4]:
start = datetime.datetime.now()
stare = pystare.from_latlon2D(lat=lat, 
                              lon=lon, 
                              adapt_resolution=True)
print(datetime.datetime.now()-start)

0:00:01.068474


# Dask

In [5]:
coords = numpy.array([lat, lon])
coords_d = dask.array.from_array(coords, chunks=(2,500,1354))
coords_d

Unnamed: 0,Array,Chunk
Bytes,1.75 MB,1.75 MB
Shape,"(2, 406, 270)","(2, 406, 270)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.75 MB 1.75 MB Shape (2, 406, 270) (2, 406, 270) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",270  406  2,

Unnamed: 0,Array,Chunk
Bytes,1.75 MB,1.75 MB
Shape,"(2, 406, 270)","(2, 406, 270)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [6]:
def stare(coords):
    return pystare.from_latlon2D(coords[0], 
                                 coords[1], adapt_resolution=True)

In [7]:
s_d = coords_d.map_blocks(stare, drop_axis=[0], 
                            chunks=(100, 1354), dtype='int64')
s_d = s_d.compute()

# Xarray Ufunc

In [8]:
lat_x = xarray.DataArray(lat, dims=['x', 'y']).chunk({'x': 500})
lon_x = xarray.DataArray(lon, dims=['x', 'y']).chunk({'x': 500})

In [9]:
start = datetime.datetime.now()
s_d = xarray.apply_ufunc(pystare.from_latlon2D, 
                         lat_x,
                         lon_x,
                         dask='parallelized',
                         output_dtypes=[numpy.int64])

sids = numpy.array(s_d)
print(datetime.datetime.now()-start)

0:00:00.739311


In [10]:
sids

array([[4298473764500464827, 4298458168380511227, 4297394569014717915,
        ..., 3604325910693007291, 3604468594879342971,
        3604495833162833211],
       [4298462872969244315, 4298459225563237243, 4297297422977447771,
        ..., 3604330264741384027, 3604471380516185659,
        3604465738696115451],
       [4298462873435275387, 4298459227962358491, 4297297429637206139,
        ..., 3604322952727773243, 3604471381825883419,
        3604465733841987675],
       ...,
       [3652144132972193499, 3650323462937407931, 3650325177740030203,
        ..., 3727730728598789563, 3727841631302055067,
        3727831398613792027],
       [3652144129926505115, 3650323400334252059, 3650325178786309339,
        ..., 3727730732960989627, 3727841627078009595,
        3727831398032615643],
       [3652167198498770747, 3652159322973158139, 3650318911383240379,
        ..., 3727838256925064987, 3727843063731949819,
        3727853163225616443]])

# Write Sidecar

In [11]:
import netCDF4
rootgrp = netCDF4.Dataset('test.nc', "w", format="NETCDF4")

rootgrp.close()

# Dask DataFrame

In [12]:
band1 = hdf.select('Water_Vapor_Infrared').get().astype(numpy.double)
lat = hdf.select('Latitude').get().astype(numpy.double)

In [13]:
import pandas
df = pandas.DataFrame({'stare': sids.flatten(), 'band1': band1.flatten()})
ddf = dask.dataframe.from_pandas(df, npartitions=4)
ddf.set_index('stare')

Unnamed: 0_level_0,band1
npartitions=4,Unnamed: 1_level_1
3604081108103418395,float64
3618748658197715035,...
3637654733721336411,...
3735896062517019259,...
4298544093115426171,...
