# Resampling

This notebooks allows the user to resample the ground truth in case they want to change some aspect. Most common use of this tool is to resample with changed SNIC parameters, and that's what will be explored here

In [158]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import geemap, ee

from geemap import cartoee
import cartopy.io.img_tiles as cimgt
import cartopy.crs as ccrs

import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm

In [159]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [160]:
%reload_ext autoreload
# import sand_classification as sc
import quality_mosaic as qm
import train_val as tv

In [161]:
try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()

In [162]:
df = tv.read_gt('../data/labels/23-07-26_gt-bands.xlsx', keep_columns=tv.resample_columns, zero_out=True)
df

Unnamed: 0,Longitude,Latitude,class_code,ID,keep,Date,Class,B2_median,B3_median,B4_median,...,NDWI_median,water_median,trees_median,grass_median,flooded_vegetation_median,crops_median,shrub_and_scrub_median,built_median,bare_median,snow_and_ice_median
0,-65.987196,18.459221,1,sedinet-1,True,2020-06-15,sand,,,,...,,,,,,,,,,
1,-75.067643,38.665018,1,sedinet-2,True,2020-06-22,sand,,,,...,,,,,,,,,,
2,-75.067228,38.665267,1,sedinet-3,True,2020-06-22,sand,,,,...,,,,,,,,,,
3,-74.093250,39.792640,1,sedinet-5,True,2021-06-24,sand,,,,...,,,,,,,,,,
4,-74.092864,39.792648,1,sedinet-6,True,2020-06-24,sand,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202,34.798517,-24.753306,1,Zavala-7,True,2023-05-11,Sand,,,,...,,,,,,,,,,
203,34.798022,-24.753467,1,Zavala-8,True,2023-05-11,Sand,,,,...,,,,,,,,,,
204,34.797492,-24.753653,1,Zavala-9,True,2023-05-11,Sand,,,,...,,,,,,,,,,
205,34.796390,-24.754006,1,Zavala-11,True,2023-05-11,Sand,,,,...,,,,,,,,,,


In [169]:
segmentation_size = 5 #pixels
max_search_window_months = 3
median_samples = 3

In [170]:
# df = df[100:102]
# df.reset_index(drop=True, inplace=True)
# df

In [171]:
#iterate over each row of df
for i, row in tqdm(df.iterrows(), total=df.shape[0]):
    print('Processing row: ', i)
    s1_s2_dw, sample = qm.get_s1s2_data(df, Map=None, index=i, display_smap=False, mosaic_method='median', 
                                 max_search_window_months=max_search_window_months, median_samples=median_samples,roi_buffer_m=5000, obia=True, interactive=False)
    if s1_s2_dw is not None:
        df, _ = qm.get_training_sample(df, s1_s2_dw, sample, Map=None, index=i, obia=True, size_seg_px=segmentation_size, interactive=False)
    else:
        print('No data for this sample')


  0%|          | 0/207 [00:00<?, ?it/s]

Processing row:  0
Search window from 2020-03-15 to 2020-09-15
Processing row:  1
Search window from 2020-03-22 to 2020-09-22
Processing row:  2
Search window from 2020-03-22 to 2020-09-22
Processing row:  3
Search window from 2021-03-24 to 2021-09-24
Processing row:  4
Search window from 2020-03-24 to 2020-09-24
Processing row:  5
Search window from 2020-04-06 to 2020-10-06
Processing row:  6
Search window from 2020-04-14 to 2020-10-14
Processing row:  7
Search window from 2020-04-14 to 2020-10-14
Processing row:  8
Search window from 2020-05-05 to 2020-11-05
Processing row:  9
Search window from 2020-05-05 to 2020-11-05
Processing row:  10
Search window from 2020-05-05 to 2020-11-05
Processing row:  11
Search window from 2020-05-20 to 2020-11-20
Processing row:  12
Search window from 2020-05-25 to 2020-11-25
Processing row:  13
Search window from 2020-06-02 to 2020-12-02
Processing row:  14
Search window from 2020-06-02 to 2020-12-02
Processing row:  15
Search window from 2020-06-02 

In [172]:
df

Unnamed: 0,Longitude,Latitude,class_code,ID,keep,Date,Class,B2_median,B3_median,B4_median,...,NDWI_median,water_median,trees_median,grass_median,flooded_vegetation_median,crops_median,shrub_and_scrub_median,built_median,bare_median,snow_and_ice_median
0,-65.987196,18.459221,1,sedinet-1,True,2020-06-15,sand,1645,2343,2934,...,-0.170226,0.08414,0.038778,0.046349,0.037734,0.059408,0.055884,0.392948,0.17713,0.049061
1,-75.067643,38.665018,1,sedinet-2,True,2020-06-22,sand,2617,3231,3834,...,-0.199872,0.046431,0.022985,0.027334,0.033331,0.042793,0.054064,0.049722,0.533417,0.184304
2,-75.067228,38.665267,1,sedinet-3,True,2020-06-22,sand,2384,2966,3580,...,-0.20157,0.037595,0.016429,0.025915,0.028977,0.045596,0.063974,0.035682,0.604572,0.131641
3,-74.093250,39.792640,1,sedinet-5,True,2021-06-24,sand,3304,3780,4300,...,-0.15018,0.043509,0.020276,0.02983,0.028388,0.047001,0.0538,0.058212,0.480324,0.240375
4,-74.092864,39.792648,1,sedinet-6,True,2020-06-24,sand,3210,3710,4192,...,-0.14578,0.047825,0.022695,0.033304,0.028265,0.056938,0.058323,0.061641,0.439562,0.247686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202,34.798517,-24.753306,1,Zavala-7,True,2023-05-11,Sand,1994,2830,3644,...,-0.191518,0.045309,0.028198,0.025014,0.032435,0.055887,0.125305,0.05696,0.563004,0.051955
203,34.798022,-24.753467,1,Zavala-8,True,2023-05-11,Sand,1920,2744,3508,...,-0.185723,0.035443,0.024025,0.023055,0.029211,0.049712,0.129282,0.060285,0.588263,0.051365
204,34.797492,-24.753653,1,Zavala-9,True,2023-05-11,Sand,1770,2554,3272,...,-0.197193,0.043471,0.023897,0.02429,0.036118,0.052919,0.144833,0.049216,0.564172,0.047067
205,34.796390,-24.754006,1,Zavala-11,True,2023-05-11,Sand,1776,2556,3280,...,-0.197098,0.042761,0.022884,0.0241,0.036113,0.052513,0.144833,0.048542,0.564975,0.046638


In [173]:
# remove all rows that have any NaN values
df2 = df.dropna().reset_index(drop=True)
df2

Unnamed: 0,Longitude,Latitude,class_code,ID,keep,Date,Class,B2_median,B3_median,B4_median,...,NDWI_median,water_median,trees_median,grass_median,flooded_vegetation_median,crops_median,shrub_and_scrub_median,built_median,bare_median,snow_and_ice_median
0,-65.987196,18.459221,1,sedinet-1,True,2020-06-15,sand,1645,2343,2934,...,-0.170226,0.08414,0.038778,0.046349,0.037734,0.059408,0.055884,0.392948,0.17713,0.049061
1,-75.067643,38.665018,1,sedinet-2,True,2020-06-22,sand,2617,3231,3834,...,-0.199872,0.046431,0.022985,0.027334,0.033331,0.042793,0.054064,0.049722,0.533417,0.184304
2,-75.067228,38.665267,1,sedinet-3,True,2020-06-22,sand,2384,2966,3580,...,-0.20157,0.037595,0.016429,0.025915,0.028977,0.045596,0.063974,0.035682,0.604572,0.131641
3,-74.093250,39.792640,1,sedinet-5,True,2021-06-24,sand,3304,3780,4300,...,-0.15018,0.043509,0.020276,0.02983,0.028388,0.047001,0.0538,0.058212,0.480324,0.240375
4,-74.092864,39.792648,1,sedinet-6,True,2020-06-24,sand,3210,3710,4192,...,-0.14578,0.047825,0.022695,0.033304,0.028265,0.056938,0.058323,0.061641,0.439562,0.247686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,34.798517,-24.753306,1,Zavala-7,True,2023-05-11,Sand,1994,2830,3644,...,-0.191518,0.045309,0.028198,0.025014,0.032435,0.055887,0.125305,0.05696,0.563004,0.051955
201,34.798022,-24.753467,1,Zavala-8,True,2023-05-11,Sand,1920,2744,3508,...,-0.185723,0.035443,0.024025,0.023055,0.029211,0.049712,0.129282,0.060285,0.588263,0.051365
202,34.797492,-24.753653,1,Zavala-9,True,2023-05-11,Sand,1770,2554,3272,...,-0.197193,0.043471,0.023897,0.02429,0.036118,0.052919,0.144833,0.049216,0.564172,0.047067
203,34.796390,-24.754006,1,Zavala-11,True,2023-05-11,Sand,1776,2556,3280,...,-0.197098,0.042761,0.022884,0.0241,0.036113,0.052513,0.144833,0.048542,0.564975,0.046638


## Save to an excel sheet

In [174]:
postfix = 's' + str(segmentation_size)
output_file_path = '../data/labels/gt-bands-resampled-' + postfix + '-dw.xlsx'
postfix, output_file_path

('s5', '../data/labels/gt-bands-resampled-s5-dw.xlsx')

In [175]:
# with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
#     df.to_excel(writer,sheet_name=postfix,index=False)

df2.to_excel(output_file_path,sheet_name=postfix,index=False)