In [2]:
import os
os.environ['PROJ_LIB'] = '/usr/local/share/proj/'

import pandas
import starepandas
import pystare
import netCDF4
import pickle
import numpy
import shapely
import geopandas
import starepandas
import sqlalchemy
import dask

In [3]:
n_workers = 62
data_dir = '/tablespace/xcal/'

In [4]:
starepandas.__version__

'0.5.31'

# Load Labels

In [5]:
with open('{}/pickles/timestamps.pickle'.format(data_dir), 'rb') as f:
    timestamps = pickle.load(f)
    
with open('{}/pickles/data.pickle'.format(data_dir), 'rb') as f:
    data = pickle.load(f)

with open('{}/pickles/largest_100.pickle'.format(data_dir), 'rb') as f:
    labels = pickle.load(f)
    
#with open('{}/pickles/labels.pickle'.format(data_dir), 'rb') as f:
#    labels = pickle.load(f)

In [6]:
#length = 10
#timestamps = timestamps[0:length]
#labels = labels[0:length]
#data = data[0:length]

# Load STARE Sidecar

## Adapt in lat direction

In [7]:
lats = numpy.tile(numpy.arange(-89.95, 90, 0.1), (3600, 1))
lats = numpy.ascontiguousarray(numpy.flip(lats).transpose())

lons = numpy.tile(numpy.arange(-179.95, 180, 0.1), (1800, 1))
lons = numpy.ascontiguousarray(lons)

sids = pystare.from_latlon_2d(lats, lons, adapt_level=True)
res = pystare.spatial_resolution(sids)
sidecar = pystare.spatial_coerce_resolution(sids, res-1)

## Adapt in lon direction

In [8]:
lats = numpy.tile(numpy.arange(-89.95, 90, 0.1), (3600, 1))
lats = numpy.ascontiguousarray(numpy.flip(lats))

lons = numpy.tile(numpy.arange(-179.95, 180, 0.1), (1800, 1))
lons = numpy.ascontiguousarray(lons.transpose())

sids = pystare.from_latlon_2d(lats, lons, adapt_level=True).transpose()
res = pystare.spatial_resolution(sids)
sidecar = pystare.spatial_coerce_resolution(sids, res-1)

# Create Areas with haversine formula:

- We assume one degree latitude to be constantly ```R * Δφ```. For 0.1 degrees, this is +-11 km
- The 0.1 degree

```
a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2)
c = 2 ⋅ atan2( √a, √(1−a) )
d = R ⋅ c
```

In [9]:
def lon_dist(lats, r, delta_lon):    
    a = numpy.cos(numpy.radians(lats))**2 * numpy.sin(numpy.radians(delta_lon))**2
    c = numpy.arctan2(numpy.sqrt(a), numpy.sqrt(1-a))
    d_lon = r*c 
    return d_lon

r = 6371e3

lats0 = numpy.ascontiguousarray(numpy.tile(numpy.arange(-90, 90, 0.1), (3600, 1)).transpose())
lats1 = numpy.ascontiguousarray(numpy.tile(numpy.arange(-89.9, 90.1, 0.1), (3600, 1)).transpose())

delta_lon = 0.1
a = lon_dist(lats0, r, delta_lon)
b = lon_dist(lats1, r, delta_lon)

delta_lat = 0.1
h = r * numpy.radians(delta_lat) 
areas = (a+b)/2 * h # square meters

# Create STAREDF

In [10]:
def make_row(label, timestep):
    x, y = (labels[timestep]==label).nonzero()
    sids = sidecar[x, y]
    area = areas[x, y]
    precip = data[timestep, x, y]
    tot_precip = sum(area * precip/1000/2) # Convert from mm to m and multipy by two because it is a half-hour
    row = {'label': label,            
           'timestep': timestep, 
           'timestamp': timestamps[timestep],                                  
           'x': x, 'y': y,
           'cell_areas': area,
           'tot_area':  sum(areas[x, y]),
           'precips': precip,           
           'tot_precip': tot_precip, # cubic meters
           'sids': sids}
    return row

def make_label_sdf(label):
    rows = []
    for timestep in range(len(timestamps)):
        row = make_row(label=label, timestep=timestep)
        if len(row['sids']) > 0:
            rows.append(row)
    sdf = starepandas.STAREDataFrame(rows, sids='sids')
    return sdf

In [11]:
label_names = numpy.unique(labels[labels>0])
label_names

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100], dtype=uint32)

In [None]:
sdfs = []
for label in label_names:
    print(label)
    sdf = make_label_sdf(label=label)
    cover = sdf.stare_dissolve(by='timestep', num_workers=n_workers*10)['sids'].rename('cover')
    sdf = sdf.set_index('timestep').join(cover)    
    sdfs.append(sdf)

1


In [None]:
merged = pandas.concat(sdfs, ignore_index=True)
merged.set_sids('cover', inplace=True)

In [None]:
with open(f'{data_dir}/pickles/featuredb.pickle', 'wb') as f:
    pickle.dump(merged, f)

In [None]:
with open(f'{data_dir}/pickles/featuredb.pickle', 'rb') as f:
    merged = pickle.load( f)

# Making geometries

In [None]:
trixels = merged.make_trixels(num_workers=n_workers*10, wrap_lon=False)

## Splitting Antimeridian

In [None]:
merged.set_trixels(trixels, inplace=True)
merged.split_antimeridian(inplace=True, drop=True, n_workers=n_workers*10)
merged.set_geometry('trixels', inplace=True, crs='EPSG:4326')

In [None]:
merged[merged.timestamp=='2022-05-22 00:00:00'].plot()

In [None]:
with open(f'{data_dir}/pickles/featuredb.pickle', 'wb') as f:
    pickle.dump(merged, f)

In [None]:
with open(f'{data_dir}/pickles/featuredb.pickle', 'rb') as f:
    merged = pickle.load( f)

## Write to gpkg

In [None]:
import copy 
# Only taking n 
sdf = copy.copy(merged[merged.label<=99])

sdf['sids_s'] = sdf.apply(lambda row : str(list(row['sids'])), axis = 1)
sdf['cover_s'] = sdf.apply(lambda row : str(list(row['cover'])), axis = 1)
sdf['precip_s'] = sdf.apply(lambda row : str(list(row['precips'])), axis = 1)
sdf['areas_s'] = sdf.apply(lambda row : str(list(row['cell_areas'])), axis = 1)
sdf['x_s'] = sdf.apply(lambda row : str(list(row['x'])), axis = 1)
sdf['y_s'] = sdf.apply(lambda row : str(list(row['y'])), axis = 1)

sub_df = sdf[['label','timestamp', 'sids_s', 'cover_s', 'precip_s', 'areas_s', 'x_s', 'y_s', 'trixels']]
sub_df.to_file('{}/pickles/featuredb.gpkg'.format(data_dir), driver='GPKG')

# Create daily aggregate

In [None]:
merged['date'] = merged['label'].astype('str') + '_' + merged['timestamp'].dt.date.astype('str')

In [None]:
dates = merged.stare_dissolve(by='date', num_workers=n_workers*10)

trixels = dates.make_trixels(num_workers=n_workers*10, wrap_lon=False)
dates.set_trixels(trixels, inplace=True)
dates.split_antimeridian(inplace=True, drop=True)
dates.set_geometry('trixels', inplace=True, crs='EPSG:4326')

In [None]:
tot = merged[['date', 'tot_area', 'tot_precip']].groupby(by='date').agg('sum')
dates = dates[['label', 'timestamp', 'sids', 'trixels']].join(tot)

In [None]:
with open('{}/pickles/dates.pickle'.format(data_dir), 'wb') as f:
    pickle.dump(dates, f)

In [None]:
dates['sids'] = dates.apply(lambda row : str(list(row['sids'])), axis=1)
dates.to_file('{}/pickles/dates.gpkg'.format(data_dir), driver='GPKG')

In [None]:
#dates['sids'] = dates.apply(lambda row: row['sids'].strip('][').split(', '), axis=1)
#dates['sids'] = dates['sids'].apply(lambda row: list(map(int, row)))

In [None]:
1