# Random sampling of xarray DataArrays

Testing a workflow for conducting stratified random sampling of rasters based on the relative proportions of each unique class

In [1]:
import pandas as pd
import xarray as xr
import geopandas as gpd
import sys
sys.path.append('../Scripts')
from deafrica_plotting import map_shapefile

## Analysis Parameters

In [2]:
total_points = 500

pred_tif = 'results/predicted_12months_255polys_SA.tif'

#### Load the datasets

In [3]:
da = xr.open_rasterio(pred_tif).squeeze()

#### Convert to pandas dataframe

In [4]:
df = da.to_dataframe(name='class')

#### Find class proportions

In [5]:
class_ratio = pd.DataFrame({'proportion': df['class'].value_counts(normalize=True),
                            'class':pd.unique(df['class'])
                                 })
class_ratio.head()

Unnamed: 0,proportion,class
0.0,0.916971,0.0
1.0,0.083029,1.0


#### Sample each class based on its relative proportion

In [6]:
samples = []
for _class in class_ratio['class']:
    #use relative proportions of classes to sample df
    no_of_points = total_points * class_ratio[class_ratio['class']==_class]['proportion'].values[0]
    #random sample each class
    print('Class '+ str(_class)+ ': sampling at '+ str(round(no_of_points)) + ' coordinates')
    sample_loc = df[df['class'] == _class].sample(n=int(round(no_of_points)), random_state=1)
    samples.append(sample_loc)

#join back into signle datafame
all_samples = pd.concat([samples[i] for i in range(0,len(samples))])
all_samples.head()

Class 0.0: sampling at 458.0 coordinates
Class 1.0: sampling at 42.0 coordinates


Unnamed: 0_level_0,Unnamed: 1_level_0,band,class
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1
-4155930.0,2087640.0,1,0.0
-4121100.0,2198490.0,1,0.0
-4146840.0,2209200.0,1,0.0
-4114950.0,2172870.0,1,0.0
-4176270.0,1870440.0,1,0.0


#### Create shapefile

In [7]:
#get pd.mulitindex coords as list 
y = [i[0] for i in list(all_samples.index)]
x = [i[1] for i in list(all_samples.index)]

#create geopandas dataframe
gdf = gpd.GeoDataFrame(
    all_samples,
    crs=da.crs,
    geometry=gpd.points_from_xy(x,y)).reset_index()

gdf = gdf.drop(['band', 'x', 'y'],axis=1)


  projstring = _prepare_from_string(projparams)


In [8]:
map_shapefile(gdf, attribute='class')

Label(value='')

Map(center=[-33.4454797369675, 20.872397555001477], controls=(ZoomControl(options=['position', 'zoom_in_text',…

In [9]:
gdf.to_file('sampling_points.shp')