In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
from tqdm.notebook import tqdm
import time
from IPython.display import clear_output

import ee, geemap

In [18]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm

# Part I: Get the locations from Google Drive

In [20]:
sheet_id = '13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos'
# sheet_id = '1XqOtPkiE_Q0dfGSoyxrH730RkwrTczcRbDeJJpqRByQ'
sheet_name = 'dingle2016'
drive_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
drive_url

'https://docs.google.com/spreadsheets/d/13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos/gviz/tq?tqx=out:csv&sheet=dingle2016'

In [21]:
output_file_path = '../data/labels/gt-bands.xlsx'

In [22]:
#Read in a google sheets with the ground truth data from string drive_loc
df = pd.read_csv(drive_url)
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,Distance Downstram of MFT (km),...,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28
0,dingle16-yamuna-0,30.290546,77.562991,gravel,2022-02-14,fluvial,10.2475/08.2016.03,India,Yamuna,1.5,...,,,,,,,,,,
1,dingle16-yamuna-1,30.260267,77.532841,other,2022-02-14,fluvial,10.2475/08.2016.04,India,Yamuna,12.7,...,,,,,,,,,,
2,dingle16-yamuna-2,30.171396,77.472709,gravel,2022-02-14,fluvial,10.2475/08.2016.05,India,Yamuna,26.1,...,,,,,,,,,,
3,dingle16-yamuna-3,30.148757,77.457765,other,2022-02-14,fluvial,10.2475/08.2016.06,India,Yamuna,32.0,...,,,,,,,,,,
4,dingle16-ganga-0,30.051389,78.276702,other,2022-11-15,fluvial,10.2475/08.2016.07,India,Ganga,-4.3,...,,,,,,,,,,
5,dingle16-ganga-1,29.90671,78.163542,other,2022-11-15,fluvial,10.2475/08.2016.08,India,Ganga,8.7,...,,,,,,,,,,
6,dingle16-ganga-2,29.878889,78.172941,other,2022-11-15,fluvial,10.2475/08.2016.09,India,Ganga,11.7,...,,,,,,,,,,
7,dingle16-ganga-3,29.787168,78.177354,gravel,2022-11-15,fluvial,10.2475/08.2016.10,India,Ganga,22.1,...,,,,,,,,,,
8,dingle16-sharda-0,29.102294,80.142014,other,2022-05-13,fluvial,10.2475/08.2016.11,India,Sharda,2.0,...,,,,,,,,,,
9,dingle16-sharda-1,29.07299,80.124145,gravel,2022-05-13,fluvial,10.2475/08.2016.12,India,Sharda,6.2,...,,,,,,,,,,


In [23]:
#look for any NaN values in the column 'date' or 'Longitude' or 'Latitude' in df_merged and drop them
df = df.dropna(subset=['Date', 'Longitude', 'Latitude']).reset_index(drop=True)
#drop any columns that have 'Unnamed:' in the column name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,Distance Downstram of MFT (km),D84,D50
0,dingle16-yamuna-0,30.290546,77.562991,gravel,2022-02-14,fluvial,10.2475/08.2016.03,India,Yamuna,1.5,109.896376,60.547689
1,dingle16-yamuna-1,30.260267,77.532841,other,2022-02-14,fluvial,10.2475/08.2016.04,India,Yamuna,12.7,167.730356,87.426576
2,dingle16-yamuna-2,30.171396,77.472709,gravel,2022-02-14,fluvial,10.2475/08.2016.05,India,Yamuna,26.1,79.341293,37.791765
3,dingle16-yamuna-3,30.148757,77.457765,other,2022-02-14,fluvial,10.2475/08.2016.06,India,Yamuna,32.0,68.119692,33.824577
4,dingle16-ganga-0,30.051389,78.276702,other,2022-11-15,fluvial,10.2475/08.2016.07,India,Ganga,-4.3,215.269482,89.884472
5,dingle16-ganga-1,29.90671,78.163542,other,2022-11-15,fluvial,10.2475/08.2016.08,India,Ganga,8.7,176.069353,109.896376
6,dingle16-ganga-2,29.878889,78.172941,other,2022-11-15,fluvial,10.2475/08.2016.09,India,Ganga,11.7,149.085899,97.00586
7,dingle16-ganga-3,29.787168,78.177354,gravel,2022-11-15,fluvial,10.2475/08.2016.10,India,Ganga,22.1,76.638637,37.791765
8,dingle16-sharda-0,29.102294,80.142014,other,2022-05-13,fluvial,10.2475/08.2016.11,India,Sharda,2.0,306.554548,83.865178
9,dingle16-sharda-1,29.07299,80.124145,gravel,2022-05-13,fluvial,10.2475/08.2016.12,India,Sharda,6.2,69.551031,24.76104


In [24]:
assert(len(df[df['Date'].isna() | df['Longitude'].isna() | df['Latitude'].isna()]) == 0)

# Part II: Select subset for training

Here we iterate over the df and iteratively pick and adjust the locations we want to keep

In [25]:
df.columns

Index(['ID', 'Latitude', 'Longitude', 'Class', 'Date', 'Site', 'Source',
       'Country', 'Notes', 'Distance Downstram of MFT (km)', 'D84', 'D50'],
      dtype='object')

In [26]:
ee.Initialize()
# ee.Authenticate()

In [27]:
output = df.copy()
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,Distance Downstram of MFT (km),D84,D50
0,dingle16-yamuna-0,30.290546,77.562991,gravel,2022-02-14,fluvial,10.2475/08.2016.03,India,Yamuna,1.5,109.896376,60.547689
1,dingle16-yamuna-1,30.260267,77.532841,other,2022-02-14,fluvial,10.2475/08.2016.04,India,Yamuna,12.7,167.730356,87.426576
2,dingle16-yamuna-2,30.171396,77.472709,gravel,2022-02-14,fluvial,10.2475/08.2016.05,India,Yamuna,26.1,79.341293,37.791765
3,dingle16-yamuna-3,30.148757,77.457765,other,2022-02-14,fluvial,10.2475/08.2016.06,India,Yamuna,32.0,68.119692,33.824577
4,dingle16-ganga-0,30.051389,78.276702,other,2022-11-15,fluvial,10.2475/08.2016.07,India,Ganga,-4.3,215.269482,89.884472
5,dingle16-ganga-1,29.90671,78.163542,other,2022-11-15,fluvial,10.2475/08.2016.08,India,Ganga,8.7,176.069353,109.896376
6,dingle16-ganga-2,29.878889,78.172941,other,2022-11-15,fluvial,10.2475/08.2016.09,India,Ganga,11.7,149.085899,97.00586
7,dingle16-ganga-3,29.787168,78.177354,gravel,2022-11-15,fluvial,10.2475/08.2016.10,India,Ganga,22.1,76.638637,37.791765
8,dingle16-sharda-0,29.102294,80.142014,other,2022-05-13,fluvial,10.2475/08.2016.11,India,Sharda,2.0,306.554548,83.865178
9,dingle16-sharda-1,29.07299,80.124145,gravel,2022-05-13,fluvial,10.2475/08.2016.12,India,Sharda,6.2,69.551031,24.76104


In [29]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm
import file_management as fm

In [80]:
# INDEX = 0
sampling_buffer_m = 5
Map = geemap.Map()
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Manual Loop

Keep running the following 2 cells until you're done with the manual sampling.


In [107]:
#Uncomment to change the date of the image
output['Date'].iloc[INDEX] = '2018-12-11'

In [108]:
s1_s2, sample = qm.get_s1s2_data(output, Map, INDEX, display_smap=False, mosaic_method='median', sampling_buffer_m=sampling_buffer_m, 
                                 max_search_window_months=2, median_samples=5,roi_buffer_m=5000, obia=True)

Index:  18  ID:  dingle16-kosi-3 Class:  gravel  Site:  fluvial
Search window from 2018-10-11 to 2019-02-11


## Use this immediately after the map is refreshed to either select or discard the sample

INDEX will auto update on calling this function.
It will also overwrite just the sheet if the same exists, but will preserve the other sheets in the book

In [109]:
output, INDEX = qm.get_training_sample(output, s1_s2, sample, Map, INDEX, display_clusters=False, obia=True)
# fm.write_to_excel(output, output_file_path, sheet_name)
with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
    output.to_excel(writer,sheet_name=sheet_name,index=False)
output.iloc[INDEX-1]

New marker accepted
Kept Observation


ID                                   dingle16-kosi-3
Latitude                                   26.787227
Longitude                                  87.123686
Class                                         gravel
Date                                      2018-12-11
Site                                         fluvial
Source                            10.2475/08.2016.24
Country                                        India
Notes                                           Kosi
Distance Downstram of MFT (km)                   8.8
D84                                       135.298309
D50                                         63.55792
B2_mean                                  2387.865479
B3_mean                                  2763.817139
B4_mean                                   2986.87085
B8_mean                                  3141.317139
B8A_mean                                 3082.381836
B11_mean                                 4161.795898
B12_mean                                 4493.

## Final Output

In [110]:
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,Distance Downstram of MFT (km),...,B11_mean,B12_mean,VV_mean,VH_mean,mTGSI_mean,BSI_mean,NDWI_mean,keep,location_tweaked,class_code
0,dingle16-yamuna-0,30.29146,77.561964,gravel,2022-02-14,fluvial,10.2475/08.2016.03,India,Yamuna,1.5,...,2219.377197,2056.53125,-9.634685,-18.923203,0.046771,0.077873,-0.121202,True,True,2
1,dingle16-yamuna-1,30.260467,77.531451,other,2022-02-14,fluvial,10.2475/08.2016.04,India,Yamuna,12.7,...,2543.482666,2409.145264,-11.071243,-21.289553,0.057402,0.08444,-0.110341,True,True,7
2,dingle16-yamuna-2,30.169525,77.476584,gravel,2022-02-14,fluvial,10.2475/08.2016.05,India,Yamuna,26.1,...,2373.446533,2117.270508,-10.771582,-18.298353,0.053079,0.096637,-0.161785,True,True,2
3,dingle16-yamuna-3,30.148757,77.457765,other,2022-02-14,fluvial,10.2475/08.2016.06,India,Yamuna,32.0,...,2491.108398,2440.03833,-10.742876,-19.266701,0.0651,0.085396,-0.098332,True,False,7
4,dingle16-ganga-0,30.050727,78.272932,other,2022-11-15,fluvial,10.2475/08.2016.07,India,Ganga,-4.3,...,3622.755615,3593.54541,-11.001678,-18.943886,0.068214,0.085928,-0.055919,True,True,7
5,dingle16-ganga-1,29.906995,78.163685,other,2022-11-15,fluvial,10.2475/08.2016.08,India,Ganga,8.7,...,3604.649902,3512.469971,-10.806618,-19.392317,0.071,0.094782,-0.08251,True,True,7
6,dingle16-ganga-2,29.878889,78.172941,other,2022-11-15,fluvial,10.2475/08.2016.09,India,Ganga,11.7,...,2465.141357,2261.391357,-7.834619,-16.453497,0.078138,0.12029,-0.155407,True,False,7
7,dingle16-ganga-3,29.792369,78.173349,gravel,2022-11-15,fluvial,10.2475/08.2016.10,India,Ganga,22.1,...,3489.370117,3545.863525,-8.715953,-17.023262,0.098847,0.115525,-0.095676,True,True,2
8,dingle16-sharda-0,29.102294,80.142014,other,2022-05-13,fluvial,10.2475/08.2016.11,India,Sharda,2.0,...,3207.373779,3220.383057,-13.895819,-25.606272,0.092357,0.111749,-0.068376,True,False,7
9,dingle16-sharda-1,29.075778,80.123819,gravel,2022-05-13,fluvial,10.2475/08.2016.12,India,Sharda,6.2,...,3055.792969,3083.984375,-14.740812,-25.136497,0.093375,0.111227,-0.071061,True,True,2
