In [1]:
import os, sys

In [2]:
root = os.path.abspath(os.path.join(os.getcwd(),'..','..','..'))
sys.path.append(root)

# Building a test set

- want to try a few different sampling patterns
- rand deploy-area, pop-dens, actual, (NTLs?)
- 2km tiles - 10k tiles for rand-area, 10k pop-dens, 15k actual at various gauss kernels

In [3]:
import rasterio
from rasterio import features

In [4]:
import geopandas as gpd
from shapely import geometry

In [5]:
import json
import numpy as np

In [6]:
import matplotlib.pyplot as plt

In [7]:
import pandas as pd

In [None]:
## so, for each concept, get the np array / image of the deployment area, then generate a few 1000 tiles, then hand-label object-level recall

### pop-dens - get shape and transform

In [None]:
with rasterio.open(os.path.join(root,'data','GHSL','GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif'),'r') as rst:
    transform = rst.transform
    crs = rst.crs
    shp = rst.shape
    #popdens_arr = rst.read([1])

In [None]:
popdens_arr.shape

In [None]:
shp

In [None]:
crs

### use Softmax

In [None]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference

#### random demo

In [None]:
a = np.zeros((10,10))
for _ in range(10):
    coords = (np.random.rand(2)*10).astype(int).tolist()
    a[coords[0],coords[1]] = 1

In [None]:
coords = list(zip(*np.where(a)))

In [None]:
a[np.where(a)]

In [None]:
# get the coords

# get the softmax values

# do the random choice

In [None]:
P = softmax(np.arange(20))

In [None]:
np.random.choice(20,5,replace=False, p=P) # from 20, choose 5

### deployment area - uniform sample

In [None]:
ne = gpd.read_file(os.path.join(root,'data','ne_10m_countries.gpkg'))
popshp = gpd.read_file(os.path.join(root,'data','popshp_gt1_d7k.geojson'))
dnr = gpd.read_file(os.path.join(root,'data','do_not_run.geojson'))
dnr_mp = dnr.unary_union
popshp = popshp[~popshp.geometry.isna()]
# clip Russia
RU = ne.loc[ne['ISO_A2']=='RU','geometry']
RU_clip = geometry.Polygon([[-180,60],[-180,89],[180,89],[180,60]])
RU_elim = RU.geometry.intersection(RU_clip)
RU_elim = RU_elim.geometry.unary_union
#clip North America
NA_poly = geometry.Polygon([[-169,60],[-169,89],[-30,89],[-30,60]])
# clip all geometries
popshp.geometry = popshp.geometry.apply(lambda geom: geom.difference(dnr_mp))
popshp.geometry = popshp.geometry.apply(lambda geom: geom.difference(NA_poly))
popshp.geometry = popshp.geometry.apply(lambda geom: geom.difference(RU_elim))
popshp = popshp[~popshp.geometry.is_empty]

In [None]:
popshp_features = json.loads(popshp.to_json())

In [None]:
len(popshp_features['features'])

In [None]:
geoms = [ft['geometry'] for ft in popshp_features['features']]

In [None]:
popshp_arr = features.rasterize(
        geoms,
        out_shape=shp,
        transform=transform,
        all_touched=False)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,8))
ax.imshow(popshp_arr)
ax.axis('off')
fig.savefig(os.path.join(root,'data','test_data','deployarea_uniform_density.png'))
plt.show()

In [None]:
coords = np.where(popshp_arr>0)

In [None]:
popshp_arr[coords[0][0],coords[1][0]]

In [None]:
vals = popshp_arr[np.where(popshp_arr>0)]

In [None]:
vals = softmax(vals)

In [None]:
select_idx = np.random.choice(len(coords[0]),11000,replace=False) # from 20, choose 5

In [None]:
x,y = rasterio.transform.xy(transform, coords[0][select_idx], coords[1][select_idx], offset='center')
x = np.array(x)
y = np.array(y)

In [None]:
# jitter by +- 30arcsec
x = x+np.random.rand(x.shape[0])*30/360 - 15/360
y = y+np.random.rand(y.shape[0])*30/360 - 15/360

In [None]:
### check that they're far away enough from the cv tiles
from shapely import ops

In [None]:
from solarpv.utils import V_inv

In [None]:
cv_tiles = gpd.read_file(os.path.join(root,'data','cv_all_tiles.geojson'))
trn_tiles = gpd.read_file(os.path.join(root,'data','all_trn_dltiles.geojson'))

In [None]:
all_tiles = cv_tiles.append(trn_tiles)

In [None]:
all_tiles['x'] = all_tiles['geometry'].centroid.x
all_tiles['y'] = all_tiles['geometry'].centroid.y

In [None]:
def _too_close(x,y):
    all_tiles['ll_dist'] = np.sqrt((all_tiles['x']-x)**2 + (all_tiles['y']-y)**2)
    idxmin = all_tiles['ll_dist'].idxmin()
    dist,_,_ = V_inv((y,x), (all_tiles.iloc[idxmin]['y'], all_tiles.iloc[idxmin]['x'])) # dist in km
    return dist<4.

In [None]:
new_pts = pd.DataFrame(np.stack([x,y]).T, columns=['x','y'])

In [None]:
new_pts['too_close'] = new_pts.apply(lambda row: _too_close(row['x'],row['y']), axis=1)

In [None]:
new_pts['too_close'].sum()

In [None]:
new_pts = new_pts[new_pts['too_close']==False].iloc[0:10000]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,8))
ax.scatter(new_pts['x'],new_pts['y'])
ax.axis('off')
fig.savefig(os.path.join(root,'data','test_data','deployarea_uniform_pts.png'))
plt.show()

In [None]:
new_pts['geometry'] = new_pts.apply(lambda row: geometry.Point(row['x'],row['y']), axis=1)

In [None]:
new_pts = gpd.GeoDataFrame(new_pts, geometry='geometry')

In [None]:
mp = new_pts.unary_union

In [None]:
import descarteslabs as dl

In [None]:
rc = dl.Raster()

In [None]:
tile_fc = rc.dltiles_from_shape(resolution=10, tilesize=200, pad=0, shape=mp)

In [None]:
json.dump(tile_fc, open(os.path.join(root,'data','test_data','deployarea_uniform_tiles_10k.geojson'),'w'))

In [None]:
tile_gdf = gpd.read_file(os.path.join(root,'data','test_data','deployarea_uniform_tiles_10k.geojson'))

In [None]:
tile_gdf.iloc[0:5000].to_file(os.path.join(root,'data','test_data','deployarea_uniform_tiles_5kA.shp'))
tile_gdf.iloc[5000:].to_file(os.path.join(root,'data','test_data','deployarea_uniform_tiles_5kB.shp'))

### Sample using pop-dens

In [8]:
from scipy.ndimage import gaussian_filter
from skimage.measure import block_reduce

In [9]:
with rasterio.open(os.path.join(root,'data','GHSL','GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif'),'r') as rst:
    transform = rst.transform
    crs = rst.crs
    shp = rst.shape
    popdens_arr = rst.read([1])

In [10]:
popdens_arr = np.squeeze(popdens_arr)

In [None]:
popdens_arr = gaussian_filter(popdens_arr, sigma=7/3)

In [None]:
popdens_arr = block_reduce(popdens_arr,(12,12),np.mean)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(32,16))
ax.imshow(block_reduce(popdens_arr,(4,4),np.mean), vmax=10000)
ax.axis('off')
fig.savefig(os.path.join(root,'data','test_data','popdens_density_sigma_0.png'))
plt.show()

In [None]:
popdens_arr = gaussian_filter(popdens_arr, sigma=7/3)