# Resampling

This notebooks allows the user to resample the ground truth in case they want to change some aspect. Most common use of this tool is to resample with changed SNIC parameters, and that's what will be explored here

In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import geemap, ee

from geemap import cartoee
import cartopy.io.img_tiles as cimgt
import cartopy.crs as ccrs

import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm

In [3]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

In [4]:
%reload_ext autoreload
# import sand_classification as sc
import quality_mosaic as qm
import train_val as tv

In [2]:
try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()

In [88]:
df = tv.read_gt('../data/labels/gt-bands.xlsx', keep_columns=tv.resample_columns)
df

Unnamed: 0,ID,class_code,B2_mean,B3_mean,B4_mean,B8_mean,B8A_mean,B11_mean,B12_mean,VV_mean,VH_mean,mTGSI_mean,BSI_mean,NDWI_mean,keep,Latitude,Longitude,Date,Class
0,sedinet-1,1,1909.959839,2408.223389,2711.885498,3095.604492,3023.788086,2754.255127,2127.833740,-12.245424,-19.858164,-0.011403,0.046433,-0.133137,True,18.459221,-65.987196,2020-06-15,sand
1,sedinet-2,1,2296.496094,2880.759766,3350.108643,4453.658691,4434.116211,5267.705566,5056.720703,-15.439523,-21.776503,0.089654,0.120402,-0.217234,True,38.665018,-75.067643,2020-06-22,sand
2,sedinet-3,1,2691.572021,3383.153076,4000.680176,4829.711914,4851.709473,6080.360352,6131.175781,-18.035833,-26.745705,0.124123,0.145531,-0.176357,True,38.665267,-75.067228,2020-06-22,sand
3,sedinet-5,1,3196.637695,3728.590576,4162.385742,4965.102539,5022.440918,5703.236328,5773.673340,-17.205423,-24.548832,0.080977,0.094375,-0.141128,True,39.792640,-74.093250,2021-06-24,sand
4,sedinet-6,1,3171.919922,3666.031982,4147.919922,4890.768066,4860.855957,5720.423828,5889.215820,-15.189863,-23.240173,0.090201,0.100478,-0.143360,True,39.792648,-74.092864,2020-06-24,sand
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,mar-1,2,1992.276367,2536.715332,2790.715332,3075.447266,3152.495850,3692.479736,3246.414551,-10.740735,-18.994785,0.071181,0.122653,-0.095751,True,45.059032,8.900689,2018-09-20,gravel
158,mar-2,1,2530.338135,2967.408447,3205.690186,3455.943604,3506.155029,3926.014160,3619.450684,-11.235057,-21.056950,0.053595,0.087829,-0.075812,True,45.130529,9.973807,2018-09-20,sand
159,mar-3,1,2083.977539,2484.022461,2703.011230,2892.101074,2878.224609,3490.910156,3325.443848,-12.918550,-21.584127,0.077710,0.108673,-0.075815,True,45.002953,10.285669,2018-09-20,sand
160,mar-4,2,1883.678589,2170.964355,2251.990967,2226.062500,2256.115967,2814.084717,2491.321533,-8.212272,-18.054604,0.057478,0.104228,-0.012534,True,38.136695,16.066178,2018-10-23,gravel


In [89]:
sampling_buffer_m = 5
segmentation_size = 5 #pixels

In [90]:
#iterate over each row of df
for i, row in tqdm(df.iterrows(), total=df.shape[0]):
    s1_s2, sample = qm.get_s1s2_data(df, Map=None, index=i, display_smap=False, mosaic_method='median', sampling_buffer_m=sampling_buffer_m, 
                                 max_search_window_months=3, median_samples=5,roi_buffer_m=5000, obia=True, interactive=False)
    
    df, _ = qm.get_training_sample(df, s1_s2, sample, Map=None, index=i, display_clusters=True, obia=True, size_seg_px=segmentation_size, interactive=False)

  0%|          | 0/162 [00:00<?, ?it/s]

Search window from 2020-03-15 to 2020-09-15
Search window from 2020-03-22 to 2020-09-22
Search window from 2020-03-22 to 2020-09-22
Search window from 2021-03-24 to 2021-09-24
Search window from 2020-03-24 to 2020-09-24
Search window from 2020-04-06 to 2020-10-06
Search window from 2020-04-14 to 2020-10-14
Search window from 2020-04-14 to 2020-10-14
Search window from 2020-05-05 to 2020-11-05
Search window from 2020-05-05 to 2020-11-05
Search window from 2020-05-05 to 2020-11-05
Search window from 2020-05-20 to 2020-11-20
Search window from 2020-05-25 to 2020-11-25
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-02 to 2020-12-02
Search window from 2020-06-11 to

In [91]:
df

Unnamed: 0,ID,class_code,B2_mean,B3_mean,B4_mean,B8_mean,B8A_mean,B11_mean,B12_mean,VV_mean,VH_mean,mTGSI_mean,BSI_mean,NDWI_mean,keep,Latitude,Longitude,Date,Class
0,sedinet-1,1,1511.533358,2096.643536,2552.878287,3190.931878,3236.005060,3107.522035,2329.694301,-11.935572,-20.457030,0.015001,0.092612,-0.206431,True,18.459221,-65.987196,2020-06-15,sand
1,sedinet-2,1,2529.118652,3167.847412,3772.559326,4761.186523,4570.389648,5820.500000,5861.533691,-18.566319,-26.146957,0.116683,0.136467,-0.201018,True,38.665018,-75.067643,2020-06-22,sand
2,sedinet-3,1,2345.690977,2963.769350,3563.614551,4482.369157,4376.874950,5474.273120,5482.997241,-16.770894,-28.260041,0.117824,0.139343,-0.203986,True,38.665267,-75.067228,2020-06-22,sand
3,sedinet-5,1,3265.176514,3771.529297,4261.176270,5059.529297,4977.353027,5585.588379,5585.293945,-16.990337,-24.056150,0.069382,0.083791,-0.145745,True,39.792640,-74.093250,2021-06-24,sand
4,sedinet-6,1,3185.351318,3710.540527,4173.837891,4986.054199,4945.000000,5840.243164,6003.837891,-16.506447,-24.847281,0.091011,0.101384,-0.146733,True,39.792648,-74.092864,2020-06-24,sand
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,mar-1,2,2000.964024,2541.247171,2798.233203,3092.173623,3189.508403,3703.246471,3245.579036,-11.453615,-19.329328,0.069457,0.121428,-0.097973,True,45.059032,8.900689,2018-09-20,gravel
158,mar-2,1,2661.529297,3113.470703,3366.176514,3633.646973,3736.058838,4132.706055,3834.411865,-10.941104,-21.402740,0.054709,0.087464,-0.077019,True,45.130529,9.973807,2018-09-20,sand
159,mar-3,1,2170.485596,2589.885742,2820.228516,3002.257080,3023.071533,3677.871338,3495.342773,-13.564342,-21.764050,0.081310,0.113713,-0.073569,True,45.002953,10.285669,2018-09-20,sand
160,mar-4,2,1850.413818,2139.517334,2205.517334,2177.241455,2213.000000,2752.103516,2452.793213,-7.837584,-18.252184,0.058261,0.103504,-0.008752,True,38.136695,16.066178,2018-10-23,gravel


## Save to an excel sheet

In [92]:
postfix = 's' + str(segmentation_size)
output_file_path = '../data/labels/gt-bands-resampled-' + postfix + '.xlsx'
postfix, output_file_path

('s5', '../data/labels/gt-bands-resampled-s5.xlsx')

In [93]:
# with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
#     df.to_excel(writer,sheet_name=postfix,index=False)

df.to_excel(output_file_path,sheet_name=postfix,index=False)