In [4]:
import os, sys
import numpy as np
from skimage.measure import block_reduce
import matplotlib.pyplot as plt
from shapely import geometry
import geopandas as gpd
gpd.options.use_pygeos=False

import descarteslabs as dl

In [34]:
import pandas as pd

In [5]:
from area import area
import pickle, json

In [6]:
root = '/home/lucas/repos/solar-pv-global-inventory'

### Load Data

In [None]:
a = np.load(os.path.join(root,'data','land_cover_arr.npz'))

In [None]:
arr = block_reduce(a['arr'], (5,5,1), np.sum)

In [None]:
arr.shape

In [None]:
draw_arr = np.argmax(block_reduce(a['arr'], (5,5,1), np.sum), axis=-1).astype(float)

mask = block_reduce(a['arr'], (5,5,1), np.sum).sum(axis=-1)==0

In [None]:
fig, ax = plt.subplots(1,1,figsize=(18,9))
#ax.imshow(mask.T, origin='lower')
ax.imshow(draw_arr.T, origin='lower')

In [None]:
x,y = np.where(mask==0)

In [None]:
def px2ll(pix_x, pix_y):
    lon = (pix_x-360)/2.
    lat = (pix_y-180)/2.
    return lon, lat

def px2bbox(pix_x, pix_y):
    lon,lat = px2ll(pix_x, pix_y)
    return geometry.box(lon, lat, lon+0.5, lat+0.5)

In [None]:
coords = list(zip(x,y))

In [None]:
px_x, px_y = coords[5]

In [None]:
_geom = px2bbox(px_x, px_y)

In [23]:
ne = gpd.read_file(os.path.join(root,'data','ne_10m_countries.gpkg'))

In [24]:
ne = ne[~ne.geometry.isna()]

In [None]:
corine_countries = ['AL', 'AT', 'BE', 'BA', 'BG', 'HR', 'CY', 'CZ', 'DK', 'EE', 'FI', 'FR', 'DE', 'GR', 'HU', 'IS', 'IE', 'IT', 'XK', 'LV', 'LI', 'LT', 'LU', 'MK', 'MT', 'ME', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SK', 'SI', 'ES', 'SE', 'CH', 'TR', 'GB']

In [None]:
for ii,(pix_x, pix_y) in enumerate(coords):
    _geom = px2bbox(pix_x, pix_y)
    aa = area(geometry.mapping(_geom))
    isostr = ne.loc[ne.intersects(_geom),'ISO_A2'].values[0]
    if isostr=='US':
        prodstr='CDL'
    elif isostr in corine_countries:
        prodstr='CORINE'
    else:
        prodstr='MODIS'
    print (ii,np.sqrt(aa)/10, aa/100, isostr, prodstr)

In [None]:
storage_client = dl.Storage()

In [None]:
os.path.abspath(os.path.join(os.getcwd(),'..','..','..','clc_legend.json'))

In [None]:
jj = json.load(open(os.path.abspath(os.path.join(os.getcwd(),'..','..','..','clc_legend.json')),'r'))

In [None]:
storage_client.set('CLC_300',json.dumps(jj))

### Cloud reduce landcover

In [None]:
import matplotlib.pyplot as plt

In [None]:
def cloud_ll_lc(_geom,prodstr, lonlat):
    
    import json
    
    import descarteslabs as dl
    
    from shapely import geometry
    import numpy as np
    
    raster_client = dl.Raster()
    metadata_client = dl.Metadata()
    storage_client = dl.Storage()
    
    
    
    def reduce_tile(tile,prodstr):
    
        #['#ed8540', '#2f9149','#b6eb7f','#d000ff','cyan','b']) # cropland, forest, grassland, developed, wetlands, barren/other


        scenes_lc = metadata_client.search(products['CLC']['name'], 
                                                    geom=tile['geometry'], 
                                                    start_datetime=products['CLC']['sdate'],  
                                                    end_datetime=products['CLC']['edate'])
        
        scenes_ai = metadata_client.search(products['AI']['name'], 
                                                    geom=tile['geometry'], 
                                                    start_datetime='1990-01-01',  
                                                    end_datetime='2019-01-03')
        
        print (f'len scenes_lc: {len(scenes_lc["features"])}, len scenes_ai: {len(scenes_ai["features"])}')


        if len(scenes_lc['features'])<1:
            res_dict = {kk:0 for kk in ind_dict.keys()}
            res_dict['all']=0
        else:

            arr_lc, meta = raster_client.ndarray(scenes_lc['features'][0].id, 
                                              bands=products['CLC']['bands'], 
                                              scales=[[0,255]], 
                                              ot='Byte', 
                                              dltile=tile['properties']['key'])
            
            arr_ai, meta = raster_client.stack([s.id for s in scenes_ai['features']], 
                                              bands=products['AI']['bands'], 
                                              scales=[[0,1]], 
                                              ot='Byte', 
                                              dltile=tile['properties']['key'])
            
            
            desert_mask = np.squeeze(arr_ai.mean(axis=0))
            print('mask',desert_mask)
            desert_mask = desert_mask / 255
            
            print (geometry.shape(tile['geometry']).representative_point())
            
            desert_mask = (desert_mask>=0.05) & (desert_mask<0.2)
            
            #print ('mask', desert_mask)
            
            """
            fig, axs = plt.subplots(1,2,figsize=(8,4))
            axs[0].imshow(arr_lc, vmin=0, vmax=255)
            axs[1].imshow(desert_mask, vmin=0, vmax=1)
            plt.show()
            """

            res_dict = {}

            for ii_k, kk in enumerate(class_labels.keys()):
                res_dict[kk]=np.isin(arr_lc[~desert_mask],int(kk)).sum()
                res_dict[kk+'_arid']=np.isin(arr_lc[desert_mask],int(kk)).sum()

            res_dict['all'] = arr_lc.shape[0]*arr_lc.shape[1]
        print (res_dict)

        return res_dict
    
    ll_geom = geometry.shape(_geom)
    class_labels = json.loads(storage_client.get('CLC_300'))
    
    products = {'CLC':{
                    'name':'oxfordeo:clc_300m_landcover',
                    'bands':['discrete_classification'],
                    'sdate':'2006-12-29',
                    'edate':'2007-01-03',
                    'resolution':300,
                    'tilesize':50
               },
               'AI':{
                   'name':'oxford-university:era5_aridity',
                    'bands':['ai'],
                    'sdate':'2006-12-29',
                    'edate':'2007-01-03',
                    'resolution':300,
                    'tilesize':50
               }
              
              
              }
    
                
    tiles = raster_client.dltiles_from_shape(products['CLC']['resolution'], 
                                             products['CLC']['tilesize'], 
                                             0, 
                                             ll_geom)
    
    print (f'len tiles: {len(tiles["features"])}')
    
    tile_res = {}
    for tile in tiles['features']:
        tile_res[tile['properties']['key']] = reduce_tile(tile,prodstr)
        
    reduction = {}
    for kk in class_labels.keys():
        reduction[kk] = int(np.nansum([tile_res[kk2][kk] for kk2 in tile_res.keys()]))
        reduction[kk+'_arid'] = int(np.nansum([tile_res[kk2][kk+'_arid'] for kk2 in tile_res.keys()]))
        
    print ('RESULT:',reduction)
        
    result = {'pixx':lonlat,'reduction':reduction}
        
    return result

In [None]:
cloud_ll_lc(_geom,None, None)

In [7]:
tasks = dl.Tasks()

In [None]:
storage = dl.Storage()

In [None]:
fn = tasks.create_function(
            cloud_ll_lc,
            image='us.gcr.io/dl-ci-cd/images/tasks/public/py3.7:v1.1.1',
            name='ll_landcover_v6',
            requirements=[],
            maximum_concurrency=60,
            memory='3.5Gi',
            retry_count=0,
            task_timeout=10800,
            )

### Deploy

In [None]:
for ii,(pix_x, pix_y) in enumerate(coords):
    _geom = px2bbox(pix_x, pix_y)
    aa = area(geometry.mapping(_geom))
    fn(geometry.mapping(_geom),None,(float(pix_x),float(pix_y)))
    print (ii,np.sqrt(aa)/10, aa/100, 'N/A', 'N/A')


### Collect

In [None]:
group = tasks.get_group('25292ad3')

In [None]:
tasks.rerun_failed_tasks('25292ad3')

In [5]:
q = tasks.get_task_results('25292ad3',limit=1000, include=['result_url'])
token=q['continuation_token']
urls = q['results']
counter = 0

while token:
    print (counter)
    counter+=1
    q = tasks.get_task_results('25292ad3',limit=1000, include=['result_url'], continuation_token=token)
    token=q['continuation_token']
    urls +=q['results']

0
1
2
3
4


In [6]:
len(urls)

5628

In [7]:
import requests

In [26]:
def mp_rejoin(ii,urls):
    results = []
    for ii_u, url in enumerate(urls):
        if ii_u %100==0:
            print (f'Logging: {ii}, {ii_u}')
        try:
            r = requests.get(url)
            results.append(json.loads(r.text))
        except Exception as e:
            print ('ERROR!',e)
            
    return results

In [27]:
pool_size=4
chunk = len(urls)//pool_size +1
all_urls = [r['result_url'] for r in urls]
urls_reshape = [all_urls[ii*chunk:(ii+1)*chunk] for ii in range(pool_size)]

In [28]:
import multiprocessing as mp

In [29]:
pool = mp.Pool(pool_size)

Logging: 1, 0Logging: 3, 0Logging: 0, 0Logging: 2, 0





In [None]:
a

In [30]:
all_results = pool.starmap(mp_rejoin, list(zip(range(pool_size), urls_reshape)))

In [31]:
new_results = []
for bunch in all_results:
    print (len(bunch))
    new_results +=bunch

61
61
61
57


In [14]:
import pandas as pd

In [15]:
df = pd.DataFrame.from_dict({tuple(r['pixx']):r['reduction'] for r in new_results}).T

In [16]:
df.to_csv(os.path.join(root,'data','LC300_latlonpix.csv'))

In [21]:
df.sum(axis=1)

113.0  258.0    35629
358.0  281.0     8557
113.0  267.0    27728
16.0   152.0    12508
113.0  269.0    34577
                ...  
692.0  136.0    37463
117.0  252.0    34081
114.0  260.0    36387
112.0  263.0    47143
114.0  256.0    20893
Length: 5628, dtype: int64

### prototype skew

In [None]:
x,y = np.where(mask==0)

In [None]:
records = {}
for pix_x, pix_y in list(zip(x,y)):
    records[(pix_x,pix_y)]= dict(zip(['cropland', 'forestshrub', 'grassy', 'human','wetlands', 'barren','other'],arr[pix_x,pix_y,:].tolist()))

In [None]:
df_arr = pd.DataFrame.from_dict(records).T

In [None]:
(df_arr.sum(axis=1)>10000).sum()

In [None]:
total_pv = df_arr.drop(columns=['other']).sum().sum()

In [None]:
df_arr.drop(columns=['other']).sum()/total_pv

In [None]:
df_arr.to_csv(os.path.join(root,'data','landcover_pvpix.csv'))

In [None]:
df = df.reset_index().rename(columns={'level_0':'pix_x','level_1':'pix_y'})
df.pix_x = df.pix_x.astype(int)
df.pix_y = df.pix_y.astype(int)

In [None]:
df = df.set_index(['pix_x','pix_y'])

In [None]:
df = df.drop(columns=['all'])

In [None]:
total_lc = df.drop(columns=['other']).sum().sum()

In [None]:
df.drop(columns=['other']).sum()/total_lc

In [None]:
# skewing away from cropland, forest, dev't; toward barren, wetlands, grassy

### Do landcover for all countries

In [1]:
def cloud_country_lc(prodstr, iso2):
    
    import json
    
    import descarteslabs as dl
    
    from shapely import geometry
    import numpy as np
    
    raster_client = dl.Raster()
    metadata_client = dl.Metadata()
    storage_client = dl.Storage()
    
    _geom = geometry.shape(json.loads(storage_client.get('ne_'+iso2)))
    
    
    
    
    def reduce_tile(tile,prodstr):
    
        scenes_lc = metadata_client.search(products['CLC']['name'], 
                                                    geom=tile['geometry'], 
                                                    start_datetime=products['CLC']['sdate'],  
                                                    end_datetime=products['CLC']['edate'])
        
        scenes_ai = metadata_client.search(products['AI']['name'], 
                                                    geom=tile['geometry'], 
                                                    start_datetime='1990-01-01',  
                                                    end_datetime='2019-01-03')
        
        print (f'len scenes_lc: {len(scenes_lc["features"])}, len scenes_ai: {len(scenes_ai["features"])}')


        if len(scenes_lc['features'])<1:
            res_dict = {kk:0 for kk in ind_dict.keys()}
            res_dict['all']=0
        else:

            arr_lc, meta = raster_client.ndarray(scenes_lc['features'][0].id, 
                                              bands=products['CLC']['bands'], 
                                              scales=[[0,255]], 
                                              ot='Byte', 
                                              dltile=tile['properties']['key'])
            
            arr_ai, meta = raster_client.stack([s.id for s in scenes_ai['features']], 
                                              bands=products['AI']['bands'], 
                                              scales=[[0,1]], 
                                              ot='Byte', 
                                              dltile=tile['properties']['key'])
            
            
            desert_mask = np.squeeze(arr_ai.mean(axis=0))
            print('mask',desert_mask)
            desert_mask = desert_mask / 255
            
            print (geometry.shape(tile['geometry']).representative_point())
            
            desert_mask = (desert_mask>=0.05) & (desert_mask<0.2)
            
            #print ('mask', desert_mask)
            
            """
            fig, axs = plt.subplots(1,2,figsize=(8,4))
            axs[0].imshow(arr_lc, vmin=0, vmax=255)
            axs[1].imshow(desert_mask, vmin=0, vmax=1)
            plt.show()
            """

            res_dict = {}

            for ii_k, kk in enumerate(class_labels.keys()):
                res_dict[kk]=np.isin(arr_lc[~desert_mask],int(kk)).sum()
                res_dict[kk+'_arid']=np.isin(arr_lc[desert_mask],int(kk)).sum()

            res_dict['all'] = arr_lc.shape[0]*arr_lc.shape[1]
        print (res_dict)

        return res_dict
    
    ll_geom = geometry.shape(_geom)
    class_labels = json.loads(storage_client.get('CLC_300'))
    
    products = {'CLC':{
                    'name':'oxfordeo:clc_300m_landcover',
                    'bands':['discrete_classification'],
                    'sdate':'2006-12-29',
                    'edate':'2007-01-03',
                    'resolution':300,
                    'tilesize':500
               },
               'AI':{
                   'name':'oxford-university:era5_aridity',
                    'bands':['ai'],
                    'sdate':'2006-12-29',
                    'edate':'2007-01-03',
                    'resolution':300,
                    'tilesize':500
               }
              
              
              }
    
                
    tiles = raster_client.dltiles_from_shape(products['CLC']['resolution'], 
                                             products['CLC']['tilesize'], 
                                             0, 
                                             ll_geom)
    
    print (f'len tiles: {len(tiles["features"])}')
    
    tile_res = {}
    for tile in tiles['features']:
        tile_res[tile['properties']['key']] = reduce_tile(tile,prodstr)
        
    reduction = {}
    for kk in class_labels.keys():
        reduction[kk] = int(np.nansum([tile_res[kk2][kk] for kk2 in tile_res.keys()]))
        reduction[kk+'_arid'] = int(np.nansum([tile_res[kk2][kk+'_arid'] for kk2 in tile_res.keys()]))
        
    print ('RESULT:',reduction)
        
    result = {'iso2':iso2,'reduction':reduction}
        
    return result

In [8]:
fn = tasks.create_function(
            cloud_country_lc,
            image='us.gcr.io/dl-ci-cd/images/tasks/public/py3.7:v1.1.1',
            name='country_landcover_v6',
            requirements=[],
            maximum_concurrency=50,
            memory='7.0Gi',
            retry_count=0,
            task_timeout=21600,
            )

In [None]:
fn  = tasks.get_function('country_landcover_v4')

#### test on GB

In [29]:
fn('n/a','GB')

Task
	Status: Pending

In [26]:
ne.loc[ne['ISO_A2']=='GB']

Unnamed: 0,index,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,ADM0_DIF,LEVEL,TYPE,ADMIN,...,NAME_KO,NAME_NL,NAME_PL,NAME_PT,NAME_RU,NAME_SV,NAME_TR,NAME_VI,NAME_ZH,geometry
79,79,Admin-0 country,0.0,2.0,United Kingdom,GB1,1.0,2.0,Country,United Kingdom,...,영국,Verenigd Koninkrijk,Wielka Brytania,Reino Unido,Великобритания,Storbritannien,Birleşik Krallık,Vương quốc Liên hiệp Anh và Bắc Ireland,英国,"MULTIPOLYGON (((-7.24710 55.06932, -7.25674 55..."


In [None]:
ne.groupby('ISO_A2').size().sort_values()

In [None]:
_geom = ne.loc[ne.ISO_A2=='FR'].geometry.unary_union

In [27]:
cloud_country_lc('na', 'GB')

len tiles: 1453
len scenes_lc: 1, len scenes_ai: 28
mask [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
POINT (-13.62043186915798 57.57732649390562)
{'0': 0, '0_arid': 0, '10': 0, '10_arid': 0, '11': 0, '11_arid': 0, '12': 0, '12_arid': 0, '20': 0, '20_arid': 0, '30': 0, '30_arid': 0, '40': 0, '40_arid': 0, '50': 0, '50_arid': 0, '60': 0, '60_arid': 0, '61': 0, '61_arid': 0, '62': 0, '62_arid': 0, '70': 0, '70_arid': 0, '71': 0, '71_arid': 0, '72': 0, '72_arid': 0, '80': 0, '80_arid': 0, '81': 0, '81_arid': 0, '82': 0, '82_arid': 0, '90': 0, '90_arid': 0, '100': 0, '100_arid': 0, '110': 0, '110_arid': 0, '120': 0, '120_arid': 0, '121': 0, '121_arid': 0, '122': 0, '122_arid': 0, '130': 0, '130_arid': 0, '140': 0, '140_arid': 0, '150': 0, '150_arid': 0, '151': 0, '151_arid': 0, '152': 0, '152_arid': 0, '153': 0, '153_arid': 0, '160': 0, '160_arid': 0, '170': 0, '170_arid': 0, '180

KeyboardInterrupt: 

In [None]:
len(tiles['features'])

In [None]:
storage.set('ne_FR', json.dumps(geometry.mapping(_geom)))

In [None]:
fn('CORINE','FR')

### Collect Failures

In [10]:
q = tasks.get_task_results('11ba261f',limit=1000, status='FAILURE', include=['arguments'])
token=q['continuation_token']
results = q['results']
counter = 0

while token:
    print (counter)
    counter+=1
    q = tasks.get_task_results('11ba261f',limit=1000, status='FAILURE', include=['arguments'], continuation_token=token)
    token=q['continuation_token']
    results +=q['results']

In [13]:
failed_countries = [json.loads(r['arguments'])['args'][1] for r in results]

In [15]:
for iso2 in failed_countries:
    fn('n/a',iso2)

#### Deploy

In [30]:
for row in ne.iterrows():
    iso2 = row[1]['ISO_A2']
    if iso2!='-99':
        _geom = row[1]['geometry']
        #storage.set('ne_'+iso2, json.dumps(geometry.mapping(_geom)))
        fn('n/a',iso2)
        print (iso2)

ID
MY
CL
BO
PE
AR
CY
IN
CN
IL
PS
LB
ET
SS
SO
KE
PK
MW
TZ
SY
FR
SR
GY
KR
KP
MA
EH
CR
NI
CG
CD
BT
UA
BY
NA
ZA
MF
SX
OM
UZ
KZ
TJ
LT
BR
UY
MN
RU
CZ
DE
EE
LV
NO
SE
FI
VN
KH
LU
AE
BE
GE
MK
AL
AZ
XK
TR
ES
LA
KG
AM
DK
LY
TN
RO
HU
SK
PL
IE
GB
GR
ZM
SL
GN
LR
CF
SD
DJ
ER
AT
IQ
IT
CH
IR
NL
LI
CI
RS
ML
SN
NG
BJ
AO
HR
SI
QA
SA
BW
ZW
BG
TH
SM
HT
DO
TD
KW
SV
GT
TL
BN
MC
DZ
MZ
SZ
BI
RW
MM
BD
AD
AF
ME
BA
UG
CU
HN
EC
CO
PY
PT
MD
TM
JO
NP
LS
CM
GA
NE
BF
TG
GH
GW
GI
US
CA
MX
BZ
PA
VE
PG
EG
YE
MR
GQ
GM
HK
VA
AQ
AU
GL
FJ
NZ
NC
MG
PH
LK
CW
AW
BS
TC
TW
JP
PM
IS
PN
PF
TF
SC
KI
MH
TT
GD
VC
BB
LC
DM
UM
MS
AG
KN
VI
BL
PR
AI
VG
JM
KY
BM
HM
SH
MU
KM
ST
CV
MT
JE
GG
IM
AX
FO
IO
SG
NF
CK
TO
WF
WS
SB
TV
MV
NR
FM
GS
FK
VU
NU
AS
PW
GU
MP
BH
FR
MO


In [None]:
redo = ['MF', 'IL', 'DJ', 'PS', 'LB', 'KR', 'KP', 'BT', 'SX', 'TJ', 'MN', 'KH',
       'AE', 'LA', 'KG', 'BJ', 'SM', 'HT', 'KW', 'DO', 'SZ', 'BI', 'AD', 'RW',
       'BD', 'UG', 'EC', 'GA', 'GW', 'TG', 'NP', 'VA', 'GQ', 'HK', 'LS', 'AW',
       'CW', 'LK', 'PM', 'NC', 'TC', 'TT', 'VC', 'GD', 'BB', 'FJ', 'LC', 'AG',
       'MS', 'DM', 'KN', 'VG', 'HM', 'BL', 'VI', 'KM', 'ST', 'KY', 'JE', 'MU',
       'SG', 'NF', 'FO', 'NR', 'GG', 'FR', 'WS', 'AX', 'NU', 'TO', 'WF', 'MP',
       'GU', 'FK', 'MO']

In [None]:
for iso2 in redo:
    if iso2!='-99':
        _geom = ne.loc[ne.ISO_A2==iso2,'geometry'].values[0]
        #storage.set('ne_'+iso2, json.dumps(geometry.mapping(_geom)))
        print (iso2)
        if iso2=='US':
            fn('CDL',iso2)
        elif (iso2 in corine_countries) and (iso2!='GB'):
            fn('CORINE',iso2)
        else:
            fn('MODIS',iso2)

In [None]:
# redo all modis
for row in ne.iterrows():
    iso2 = row[1]['ISO_A2']
    if iso2!='-99':
        #_geom = row[1]['geometry']
        #storage.set('ne_'+iso2, json.dumps(geometry.mapping(_geom)))
        print (iso2)
        if iso2=='US':
            pass #fn('CDL',iso2)
        elif (iso2 in corine_countries) and (iso2!='GB'):
            pass #fn('CORINE',iso2)
        else:
            fn('MODIS',iso2)

### Collect Countries

In [16]:
urls = []

In [18]:
q = tasks.get_task_results('11ba261f',limit=1000, status='SUCCESS', include=['result_url'])
token=q['continuation_token']
urls += q['results']
counter = 0

while token:
    print (counter)
    counter+=1
    q = tasks.get_task_results('11ba261f',limit=1000, status='SUCCESS', include=['result_url'], continuation_token=token)
    token=q['continuation_token']
    urls +=q['results']

In [20]:
q = tasks.get_task_results('4a5fe7e3',limit=1000, status='SUCCESS', include=['result_url'])
token=q['continuation_token']
urls += q['results']
counter = 0

while token:
    print (counter)
    counter+=1
    q = tasks.get_task_results('4a5fe7e3',limit=1000, status='SUCCESS', include=['result_url'], continuation_token=token)
    token=q['continuation_token']
    urls +=q['results']

In [21]:
len(urls)

240

In [24]:
import requests

In [40]:
pd.DataFrame.from_records([r['reduction'] for r in new_results], index=[r['iso2'] for r in new_results]).to_csv(os.path.join(root,'data','LC300_world.csv'))

### Wrangle labels

In [None]:
labels = {}
labels['CORINE'] = pickle.load(open(os.path.join(root,'data','class_labels_CORINE.pkl'),'rb'))
labels['MODIS'] = pickle.load(open(os.path.join(root,'data','class_labels_MODIS.pkl'),'rb'))
labels['CDL'] = pickle.load(open(os.path.join(root,'data','class_labels_cdl.pkl'),'rb'))

In [None]:
labels_agg = {}

In [None]:
labels_agg['CORINE'] = {
    'forestshrub':[23,24,25,29],
    'wetlands':[35,36,37,38],
    'human':[1,2,3,4,5,6,7,8,9,10,11],
    'cropland':[12,13,14,15,16,17,18,19,20,21,22],
    'grassy':[26,27,28,],
    'barren':[30,31,32,33,34],
    'other':[39,40,41,42,43,44,45,46,47],
}
labels_agg['MODIS'] = {    
    'forestshrub':[1,2,3,4,5,6,7,8],
    'wetlands':[11],
    'human':[13],
    'cropland':[12,14],
    'grassy':[9,10],
    'barren':[15,16],
    'other':[17,0],
}
labels_agg['CDL'] = {    
    'forestshrub':[63,64, 141, 142, 143, 152],
    'wetlands':[87, 190, 195],
    'human':[82, 121, 122, 123, 124],
    'cropland':[], # else
    'grassy':[59,60,61,62, 176],
    'barren':[65,112,131],
    'other':[0, 81, 83, 88, 111], #''
}
existing_labels = [el for kk,vv in labels_agg['CDL'].items() for el in vv]

for kk, vv in labels['CDL'].items():
    if kk not in existing_labels:
        if vv=='':
            labels_agg['CDL']['other'].append(kk)
        else:
            labels_agg['CDL']['cropland'].append(kk)

In [None]:
labels_agg

In [None]:
json.dump(labels_agg, open(os.path.join(root,'data','all_labels.json'),'w'))

#### Set to storage

In [None]:
storage_client = dl.Storage()

In [None]:
storage_client.set('land_cover_labels',json.dumps(labels_agg))

### Prototype

In [None]:
raster_client = dl.Raster()
metadata_client = dl.Metadata()

In [None]:
aa = area(geometry.mapping(px2bbox(coords[10][0], coords[10][1]))) # m^2
np.sqrt(aa) #m 
np.sqrt(aa)/10 # px

In [None]:
DE = ne.loc[ne['ISO_A2']=='DE','geometry']

In [None]:
US.geometry.values[0]

In [None]:
RU.geometry.values[0]

In [None]:
_geom = px2bbox(coords[10][0], coords[10][1])

In [None]:
tiles = raster_client.dltiles_from_shape(10, 10000, 0, DE.geometry.values[0])

In [None]:
len(tiles['features'])

In [None]:
tiles = raster_client.dltiles_from_shape(500, 50, 0, _geom)


In [None]:
tiles = raster_client.dltiles_from_shape(1.5, 5000, 0, _geom)

In [None]:
len(tiles['features'])

In [None]:
labels_all = json.load(open(os.path.join(root,'data','all_labels.json'),'r'))

In [None]:
labels_all['CORINE']

In [None]:
products = {
    'CORINE':{'product':'oxford-university:corine-land-cover',
             'bands':['CLC_class'],
             'sdate':'2006-12-29',
             'edate':'2007-01-03',
             'resolution':10,
             'tilesize':1000},
    'MODIS':{'product':"modis:mcd12q1:051",
            'bands':['Land_Cover_Type_1'],
            'sdate':'2006-12-29', 
            'edate':'2007-01-03',
            'resolution':500,
            'tilesize':50},
    'CDL':{'product':"usda:cdl:v1",
          'bands':['class'],
          'sdate':'2009-12-29',  # first year with full lower 48
          'edate':'2010-01-03',
          'resolution':30,
          'tilesize':250}
}

In [None]:
def do_ll(ll_geom,prodstr):
    
    
    classes = ['cropland', 'forestshrub', 'grassy', 'human', 'wetlands', 'barren','other','all']
    
    tiles = raster_client.dltiles_from_shape(products[prodstr]['resolution'], 
                                             products[prodstr]['tilesize'], 
                                             0, 
                                             ll_geom)
    
    print (f'len tiles: {len(tiles["features"])}')
    
    tile_res = {}
    for tile in tiles['features']:
        tile_res[tile['properties']['key']] = reduce_tile(tile,prodstr)
        
    reduction = {}
    for kk in classes:
        reduction[kk] = sum(tile_res[kk2][kk] for kk2 in tile_res.keys())
        
    return reduction

In [None]:
def reduce_tile(tile,prodstr):
    
    #['#ed8540', '#2f9149','#b6eb7f','#d000ff','cyan','b']) # cropland, forest, grassland, developed, wetlands, barren/other
    
    ind_dict = dict(zip(['cropland', 'forestshrub', 'grassy', 'human', 'wetlands', 'barren','other'],range(7)))
    
    scenes = metadata_client.search(products[prodstr]['product'], 
                                                geom=tile['geometry'], 
                                                start_datetime=products[prodstr]['sdate'],  
                                                end_datetime=products[prodstr]['edate'])
    print (f'len scenes: {len(scenes["features"])}')
    #print (scenes['features'][0])
    
    if len(scenes['features'])<1:
        res_dict = {kk:0 for kk in ind_dict.keys()}
        res_dict['all']=0
    else:
    
        arr, meta = raster_client.ndarray(scenes['features'][0].id, 
                                          bands=products[prodstr]['bands'], 
                                          scales=[[0,255]], 
                                          ot='Byte', 
                                          dltile=tile['properties']['key'])

        #print (np.unique(arr))
        #for vv in np.unique(arr):
        #    print (vv, labels['CDL'][vv],np.isin(arr, [vv]).sum())
        #print (arr.shape)
        res_dict = {}

        for kk in ind_dict.keys():
            res_dict[kk]=np.isin(arr,labels_all[prodstr][kk]).sum()

        res_dict['all'] = arr.shape[0]*arr.shape[1]
    print (res_dict)
    #plt.imshow(arr)
    return res_dict

In [None]:
for pix_x, pix_y in coords[1123:1124]:
    _geom = px2bbox(pix_x, pix_y)
    print (list(_geom.exterior.coords))
    aa = area(geometry.mapping(_geom))
    isostr = ne.loc[ne.intersects(_geom),'ISO_A2'].values[0]
    if isostr=='US':
        prodstr='CDL'
    elif isostr in corine_countries:
        prodstr='CORINE'
    else:
        prodstr='MODIS'
    print (np.sqrt(aa)/10, aa/100, isostr, prodstr)
    
    GOTRESULT = do_ll(_geom,prodstr)
    print ('GOTRESULT',GOTRESULT)