In [1]:
import numpy as np
import os
import rioxarray as rioxr
from random import sample

from shapely.geometry import Point 
import geopandas as gpd

from rasterio.crs import CRS

import pandas as pd

In [2]:
# Load raster
year = 2020

root = '/home/jovyan/msai4earth-esa/iceplant_detection/temp/' + os.path.join('LS_filter_clip_preds_' + str(year))

fp = os.path.join(root, 'LS_merged_crs26910_S_'+str(year)+'.tif')
r_26910_S = rioxr.open_rasterio(fp).squeeze()

fp = os.path.join(root, 'LS_merged_crs26910_W_'+str(year)+'.tif')
r_26910_W = rioxr.open_rasterio(fp).squeeze()

fp = os.path.join(root, 'LS_merged_crs26911_'+str(year)+'_clip.tif')
r_26911 = rioxr.open_rasterio(fp).squeeze()

rasters = [r_26910_S, r_26910_W, r_26911]

In [3]:
pixel_count = pd.read_csv(os.path.join(os.getcwd(), 'rasters_2020_pixel_counts.csv'))
pixel_count

Unnamed: 0,n_nonice_20,n_ice_20,n_ground_20,n_water_20,raster
0,33222542,9359285,113802445,126328228,LS_merged_crs26910_S_2020
1,1005498,626950,3058661,6139494,LS_merged_crs26910_W_2020
2,89606257,2915223,63428879,138290707,LS_merged_crs26911_2020_clip


In [None]:
# STRATIFIED RANDOM SAMPLING
cats = [0, 1, 2, 3]
n_samples = [87, 87, 91, 135]
points = []
classes = []
which_raster = []

for c in range(0, len(cats)):
    
    # pixels in each raster from specified class
    n1 = list(pixel_count.iloc[:,c])[0]
    n2 = list(pixel_count.iloc[:,c])[1]
    n3 = list(pixel_count.iloc[:,c])[2]
    s = n1 + n2 + n3
     
    # randomly specify number of samples per raster
    dist = np.random.choice([0,1,2], size=n_samples[c], p=[n1/s, n2/s, n3/s])
    for i in range(0,3):
        n_samp_per_raster = len(np.where(dist == i)[0])

        if n_samp_per_raster != 0:

            raster = rasters[i]

            # arrays with indices where condition is True
            cat_raster = np.where(raster == cats[c])

            # sample random (y,x) pairs from list
            indices = sample(list(zip(cat_raster[0], cat_raster[1])), n_samp_per_raster)

            # unzip into y and x coordinates
            y, x = list(zip(*indices))

            # x and y coordinates in raster CRS corresponding to sampled points
            x_coord = raster.x[np.asarray(x)]
            y_coord = raster.y[np.asarray(y)]

            # make points 
            points = points + [Point(m,n) for m,n in zip(x_coord, y_coord)]
            
            which_raster = which_raster + [i]*n_samp_per_raster

    classes = classes + [cats[c]]*n_samples[c]

In [None]:
points_df = gpd.GeoDataFrame({'geometry': points,
                              'class' : classes,
                             'which_raster' : which_raster},
                             crs = raster.rio.crs)

points_df = points_df.to_crs(CRS.from_epsg(4326))  # change to lat/lon crs

points_df['LON'] = points_df.geometry.x
points_df['LAT'] = points_df.geometry.y

points_df = points_df.drop(['geometry'], axis=1)

In [None]:
points_df

In [None]:
points_df.to_csv('tiny_sample.csv', index_label='PLOTID')

In [None]:
[n1/s, n2/s, n3/s]