In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")
from tqdm.notebook import tqdm
import time
from IPython.display import clear_output

import ee, geemap

In [71]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [72]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm

# Part I: Get the locations from Google Drive

In [73]:
sheet_id = '13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos'
# sheet_id = '1XqOtPkiE_Q0dfGSoyxrH730RkwrTczcRbDeJJpqRByQ'
sheet_name = 'dingle2020'
drive_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
drive_url

'https://docs.google.com/spreadsheets/d/13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos/gviz/tq?tqx=out:csv&sheet=dingle2020'

In [74]:
output_file_path = '../data/labels/gt-bands.xlsx'

In [90]:
#Read in a google sheets with the ground truth data from string drive_loc
df = pd.read_csv(drive_url)
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,D50,...,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27
0,dingle2020-0,28.599792,81.259463,gravel,2019-04-08,fluvial,10.2475/08.2016.03,Nepal,Surface grain size from central bar at Karnali...,65.0,...,,,,,,,,,,
1,dingle2020-1,28.601396,81.261771,other,2019-04-08,fluvial,10.2475/08.2016.04,Nepal,Exposed gravel bed material at bifurcation,231.0,...,,,,,,,,,,
2,dingle2020-2,28.417784,81.04349,sand,2019-08-21,fluvial,10.2475/08.2016.05,Nepal,Bank material downstream of the gravel-sand tr...,0.23,...,,,,,,,,,,
3,dingle2020-3,28.420623,81.05193,sand,2019-04-08,fluvial,10.2475/08.2016.06,Nepal,Bed material dredged at transect T5,0.31,...,,,,,,,,,,


In [91]:
#look for any NaN values in the column 'date' or 'Longitude' or 'Latitude' in df_merged and drop them
df = df.dropna(subset=['Date', 'Longitude', 'Latitude']).reset_index(drop=True)
#drop any columns that have 'Unnamed:' in the column name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,D50,D90
0,dingle2020-0,28.599792,81.259463,gravel,2019-04-08,fluvial,10.2475/08.2016.03,Nepal,Surface grain size from central bar at Karnali...,65.0,155.0
1,dingle2020-1,28.601396,81.261771,other,2019-04-08,fluvial,10.2475/08.2016.04,Nepal,Exposed gravel bed material at bifurcation,231.0,418.0
2,dingle2020-2,28.417784,81.04349,sand,2019-08-21,fluvial,10.2475/08.2016.05,Nepal,Bank material downstream of the gravel-sand tr...,0.23,0.26
3,dingle2020-3,28.420623,81.05193,sand,2019-04-08,fluvial,10.2475/08.2016.06,Nepal,Bed material dredged at transect T5,0.31,0.89


In [77]:
assert(len(df[df['Date'].isna() | df['Longitude'].isna() | df['Latitude'].isna()]) == 0)

# Part II: Select subset for training

Here we iterate over the df and iteratively pick and adjust the locations we want to keep

In [78]:
df.columns

Index(['ID', 'Latitude', 'Longitude', 'Class', 'Date', 'Site', 'Source',
       'Country', 'Notes', 'D50', 'D90'],
      dtype='object')

In [79]:
ee.Initialize()
# ee.Authenticate()

In [96]:
output = df.copy()
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,D50,D90
0,dingle2020-0,28.599792,81.259463,gravel,2019-04-08,fluvial,10.2475/08.2016.03,Nepal,Surface grain size from central bar at Karnali...,65.0,155.0
1,dingle2020-1,28.601396,81.261771,other,2019-04-08,fluvial,10.2475/08.2016.04,Nepal,Exposed gravel bed material at bifurcation,231.0,418.0
2,dingle2020-2,28.417784,81.04349,sand,2019-08-21,fluvial,10.2475/08.2016.05,Nepal,Bank material downstream of the gravel-sand tr...,0.23,0.26
3,dingle2020-3,28.420623,81.05193,sand,2019-04-08,fluvial,10.2475/08.2016.06,Nepal,Bed material dredged at transect T5,0.31,0.89


In [81]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm
import file_management as fm

In [119]:
INDEX = 1
sampling_buffer_m = 5
Map = geemap.Map()
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Manual Loop

Keep running the following 2 cells until you're done with the manual sampling.


In [120]:
s1_s2, sample = qm.get_s1s2_data(output, Map, INDEX, display_smap=True, mosaic_method='qm', sampling_buffer_m=sampling_buffer_m, median_samples=5,roi_buffer_m=5000, obia=True)

Index:  1  ID:  dingle2020-1 Class:  other  Site:  fluvial
Search window from 2018-10-08 to 2019-10-08


Map(center=[28.601396010395995, 81.26177107953733], controls=(ZoomControl(options=['position', 'zoom_in_text',…

## Use this immediately after the map is refreshed to either select or discard the sample

INDEX will auto update on calling this function

In [115]:
output, INDEX = qm.get_training_sample(output, s1_s2, sample, Map, INDEX, display_clusters=False, obia=True)
# fm.write_to_excel(output, output_file_path, sheet_name)
with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
    output.to_excel(writer,sheet_name=sheet_name,index=False)
output.iloc[INDEX-1]

New marker accepted
Kept Observation


ID                                         dingle2020-3
Latitude                                      28.384557
Longitude                                     81.021506
Class                                              sand
Date                                         2019-04-08
Site                                            fluvial
Source                               10.2475/08.2016.06
Country                                           Nepal
Notes               Bed material dredged at transect T5
D50                                                0.31
D90                                                0.89
B2_mean                                     2047.977295
B3_mean                                     2420.636475
B4_mean                                      2623.20459
B8_mean                                      2960.46582
B8A_mean                                    3069.886475
B11_mean                                    3742.113525
B12_mean                                    3703

## Final Output

In [116]:
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,D50,...,B11_mean,B12_mean,VV_mean,VH_mean,mTGSI_mean,BSI_mean,NDWI_mean,keep,location_tweaked,class_code
0,dingle2020-0,28.600141,81.259212,gravel,2019-04-08,fluvial,10.2475/08.2016.03,Nepal,Surface grain size from central bar at Karnali...,65.0,...,3111.109131,2926.971191,-12.529706,-20.856337,0.062644,0.093398,-0.071288,True,True,2
1,dingle2020-1,28.601396,81.261771,other,2019-04-08,fluvial,10.2475/08.2016.04,Nepal,Exposed gravel bed material at bifurcation,231.0,...,3049.752441,2670.714355,-6.511257,-13.813314,0.031236,0.073106,-0.056618,True,False,7
2,dingle2020-2,28.417711,81.047389,sand,2019-04-08,fluvial,10.2475/08.2016.05,Nepal,Bank material downstream of the gravel-sand tr...,0.23,...,3654.165527,3808.426758,-17.025816,-25.706015,0.106878,0.117692,-0.081543,True,True,1
3,dingle2020-3,28.384557,81.021506,sand,2019-04-08,fluvial,10.2475/08.2016.06,Nepal,Bed material dredged at transect T5,0.31,...,3742.113525,3703.988525,-17.175804,-25.329865,0.095582,0.119001,-0.100381,True,True,1
