In [6]:
import starepandas
import pickle
import sqlalchemy
import pandas
import numpy
import datetime
import time
import geopandas
import matplotlib.pyplot as plt

In [7]:
level = 10

# Load Cover

In [8]:
fname = 'covers/caribbean_sids_cover_l{}.pickle'.format(level)
with open(fname, 'rb') as f:
    cover_sids = pickle.load(f)

# Load catalogue

In [9]:
catalog = pandas.read_pickle('catalogs/cldmsk_noaa20.pickle')

# Overlay test

In [10]:
intersects = catalog.stare_intersects(cover_sids, n_workers=5)
intersects.to_pickle('catalogs/cldmsk_noaa20_intersecting.pickle')

# Find nonintersecting

In [11]:
nonintersecting = catalog[intersects==False]
intersecting = catalog[intersects==True]
nonintersecting.head()

Unnamed: 0,begining,ending,granule_path,sidecar_path,stare_cover
2,2018-03-27T06:42:00.000Z,2018-03-27T06:48:00.000Z,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,"[2675138178658074627, 2693152577167556611, 266..."
4,2019-08-28T04:24:00.000Z,2019-08-28T04:30:00.000Z,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,"[2558044588346441731, 2494994193563254788, 249..."
5,2020-05-23T08:48:00.000Z,2020-05-23T08:54:00.000Z,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,"[2972375754064527363, 2981382953319268355, 292..."
6,2019-01-06T05:42:00.000Z,2019-01-06T05:48:00.000Z,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,"[3035426148847714307, 3602879701896396803, 244..."
8,2019-01-28T05:30:00.000Z,2019-01-28T05:36:00.000Z,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,/tablespace/caribbean/CLDMSK_L2_VIIRS_NOAA20/C...,"[3602879701896396803, 2449958197289549828, 245..."


# Draw some nonintesecting

In [13]:
ns_subset = nonintersecting.iloc[0:50].copy()
trixels = ns_subset.trixels(n_workers=5)
ns_subset.set_trixels(trixels, inplace=True)

In [14]:
is_subset = intersecting.iloc[0:50].copy()
trixels = is_subset.trixels(n_workers=5)
is_subset.set_trixels(trixels, inplace=True)

In [None]:
caribbean = geopandas.read_file('study_area_fao_clean_210326.gpkg')

In [None]:
fig, ax = plt.subplots(figsize=(6,6), dpi=300)
ax.grid(True)

is_subset.plot(ax=ax, trixels=True, boundary=False, linewidth=0.1, color='g')
ns_subset.plot(ax=ax, trixels=True, boundary=False, linewidth=0.1, color='r')
caribbean.plot(ax=ax)

# Calculate runtime

In [22]:
def paths2timestamps(granule_paths):
    timestamps = []
    for granule_path in granule_paths:
        path_parts = granule_path.split('/')
        name = path_parts[4]
        parts = name.split(".")
        date = parts[1]
        time = parts[2]
        timestamp = date+ '.' + time
        timestamps.append(timestamp)
    return timestamps
        

def paths2names(granule_paths):
    granule_names = []    
    for granule_path in granule_paths:
        path_parts = granule_path.split('/')
        name = path_parts[4]
        granule_names.append(name)
    return granule_names
        

def find_non_intersecting(catalog, cover, n_workers):   
    intersects = catalog.stare_intersects(cover, n_workers=n_workers)
    non_intersecting = catalog[intersects==False]    
    return non_intersecting

def find_interseting(catalog, cover, n_workers):
    intersects = catalog.stare_intersects(cover, n_workers=n_workers)
    intersecting = catalog[intersects==True]    
    return intersecting  

In [23]:
def load_catalog(catalog_name):    
    catalog = pandas.read_pickle('catalogs/catalog_vj103.pickle') 
    return catalog

def load_cover(cover_level):
    cover_path = 'covers/caribbean_sids_cover_l{}.pickle'.format(cover_level)
    with open(cover_path, 'rb') as f:
        cover_sids = pickle.load(f)
    return cover_sids

In [24]:
level = 10

def subset_catalog(catalog_name, level):
    catalog = load_catalog(catalog_name)
    cover = load_cover(cover_level=level)
    start = datetime.datetime.now()

    intersecting = find_intersecting(catalog, cover, 60)

    duration = datetime.datetime.now() - start
    print('Duration for level {level} was {duration}'.format(level=level, duration=duration))
   
    pickle_name = '{catalog_name}_intersecting.pickle'.format(catalog_name=catalog_name.split('.')[0])
    intersecting.to_pickle(pickle_name)
    
    
subset_catalog('cldmsk_noaa_20', level)

# Runtime 
cldmsk_noaa_20 has 9302 files \
Level 10: 20 seconds \
Level 11: 30 seconds \
Level 12: 50 seconds \
Level 13: 97 seconds \
Level 14: 198 seconds \
Level 15: 450 seconds 

# Find nonintersecting

## CLDMSK NOAA20

In [19]:
catalog = pandas.read_pickle('catalogs/cldmsk_noaa20.pickle')
intersects = catalog.stare_intersects(cover_sids, n_workers=60)
cldmsk_noaa20_ni = catalog[intersects==False]
cldmsk_noaa20_is = catalog[intersects==True]

cldmsk_noaa20_ni.to_pickle('catalogs/cldmsk_noaa20_nonintersecting.pickle')
cldmsk_noaa20_is.to_pickle('catalogs/cldmsk_noaa20_intersecting.pickle')

## CLDMSK SNPP

In [20]:
catalog = pandas.read_pickle('catalogs/cldmsk_snpp.pickle')
intersects = catalog.stare_intersects(cover_sids, n_workers=60)
cldmsk_snpp_ni = catalog[intersects==False]
cldmsk_snpp_is = catalog[intersects==True]

cldmsk_snpp_ni.to_pickle('catalogs/cldmsk_snpp_nonintersecting.pickle')
cldmsk_snpp_is.to_pickle('catalogs/cldmsk_snpp_intersecting.pickle')

## VJ103DNB

In [21]:
catalog = pandas.read_pickle('catalogs/vj103.pickle')
intersects = catalog.stare_intersects(cover_sids, n_workers=60)
vj103_ni = catalog[intersects==False]
vj103_is = catalog[intersects==True]

vj103_ni.to_pickle('catalogs/vj103_nonintersecting.pickle')
vj103_is.to_pickle('catalogs/vj103_intersecting.pickle')

## VNP03DNB

In [None]:
catalog = pandas.read_pickle('catalogs/vnp03.pickle')
intersects = catalog.stare_intersects(cover_sids, n_workers=60)
vnp03_ni = catalog[intersects==False]
vnp03_is = catalog[intersects==True]

vnp03_ni.to_pickle('catalogs/vnp03_nonintersecting.pickle')
vnp03_is.to_pickle('catalogs/vnp03_intersecting.pickle')

# Move granules

In [40]:
granule_paths = nonintersecting['granule_path']
vnp02_paths = []
for granule_path in granule_paths:
    vnp02_path = starepandas.guess_companion_path(granule_path, 
                                                  prefix='VNP02DNB', 
                                                  folder='/tablespace/caribbean/VNP02DNB/')
    vnp02_paths.append(vnp02_path)
vnp02_paths = pandas.Series(vnp02_paths)

In [None]:
def mv_granules(nonintersecting_catalog, dest_folder, companion_folder, companion_prefix):
    pass