In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
from tqdm.notebook import tqdm
import time
from IPython.display import clear_output

import ee, geemap

In [2]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

In [3]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm

# Part I: Get the locations from Google Drive

In [9]:
sheet_id = '13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos'
# sheet_id = '1XqOtPkiE_Q0dfGSoyxrH730RkwrTczcRbDeJJpqRByQ'
sheet_name = 'wilkerson'
drive_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
drive_url

'https://docs.google.com/spreadsheets/d/13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos/gviz/tq?tqx=out:csv&sheet=wilkerson'

In [5]:
output_file_path = '../data/labels/gt-bands.xlsx'

In [10]:
#Read in a google sheets with the ground truth data from string drive_loc
df = pd.read_csv(drive_url)
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,D50,Notes,...,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,wilkerson-0,19.899304,102.140084,sand,2022-02-25,fluvial,Wilkerson and Parker (2011),Laos,0.31,,...,,,,,,,,,,
1,wilkerson-1,28.858429,70.391257,sand,2021-04-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.18,,...,,,,,,,,,,
2,wilkerson-2,9.141009,4.797954,sand,2021-04-30,fluvial,Wilkerson and Parker (2011),Nigeria,0.45,,...,,,,,,,,,,
3,wilkerson-3,25.961395,80.159471,sand,2022-11-30,fluvial,Wilkerson and Parker (2011),India,0.18,,...,,,,,,,,,,
4,wilkerson-4,25.397259,85.948165,sand,2020-04-18,fluvial,Wilkerson and Parker (2011),India,0.43,,...,,,,,,,,,,
5,wilkerson-5,29.40667,71.567847,sand,2022-10-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.17,,...,,,,,,,,,,
6,wilkerson-6,,,sand,,fluvial,Wilkerson and Parker (2011),India,0.1,,...,,,,,,,,,,
7,wilkerson-7,,,sand,,fluvial,Wilkerson and Parker (2011),India,1.6,,...,,,,,,,,,,
8,wilkerson-8,,,sand,,fluvial,Wilkerson and Parker (2011),Australia,0.9,,...,,,,,,,,,,
9,wilkerson-9,15.163073,75.885587,fine,2022-12-16,fluvial,Wilkerson and Parker (2011),India,0.04,,...,,,,,,,,,,


In [11]:
#look for any NaN values in the column 'date' or 'Longitude' or 'Latitude' in df_merged and drop them
df = df.dropna(subset=['Date', 'Longitude', 'Latitude']).reset_index(drop=True)
#drop any columns that have 'Unnamed:' in the column name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,D50,Notes,Location Notes
0,wilkerson-0,19.899304,102.140084,sand,2022-02-25,fluvial,Wilkerson and Parker (2011),Laos,0.31,,Mekong River at Wat Sop (No: 00248; p. 71)
1,wilkerson-1,28.858429,70.391257,sand,2021-04-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.18,,Indus (at Hajipur) Pakistan
2,wilkerson-2,9.141009,4.797954,sand,2021-04-30,fluvial,Wilkerson and Parker (2011),Nigeria,0.45,,"Niger River at Jebba, Nigeria (p. 72)"
3,wilkerson-3,25.961395,80.159471,sand,2022-11-30,fluvial,Wilkerson and Parker (2011),India,0.18,,"Yamuna (at Hamirpur, U.P. state) India"
4,wilkerson-4,25.397259,85.948165,sand,2020-04-18,fluvial,Wilkerson and Parker (2011),India,0.43,,Ganga (at Mokameh in Bihar state) India
5,wilkerson-5,29.40667,71.567847,sand,2022-10-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.17,,Sutlej (in Samasatta below railway bridge) Pak...
6,wilkerson-9,15.163073,75.885587,fine,2022-12-16,fluvial,Wilkerson and Parker (2011),India,0.04,,"Bennihalli (above Railway Bridge, Gadag Hubli ..."
7,chaplin-0,40.633863,-75.464181,gravel,2018-06-15,fluvial,Chaplin (2005),USA,38.2,118.0,Clear signs of mining


In [12]:
assert(len(df[df['Date'].isna() | df['Longitude'].isna() | df['Latitude'].isna()]) == 0)

# Part II: Select subset for training

Here we iterate over the df and iteratively pick and adjust the locations we want to keep

In [9]:
df.columns

Index(['ID', 'Latitude', 'Longitude', 'Class', 'Date', 'Site', 'Source',
       'Country', 'Notes'],
      dtype='object')

In [13]:
try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()


In [14]:
output = df.copy()
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,D50,Notes,Location Notes
0,wilkerson-0,19.899304,102.140084,sand,2022-02-25,fluvial,Wilkerson and Parker (2011),Laos,0.31,,Mekong River at Wat Sop (No: 00248; p. 71)
1,wilkerson-1,28.858429,70.391257,sand,2021-04-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.18,,Indus (at Hajipur) Pakistan
2,wilkerson-2,9.141009,4.797954,sand,2021-04-30,fluvial,Wilkerson and Parker (2011),Nigeria,0.45,,"Niger River at Jebba, Nigeria (p. 72)"
3,wilkerson-3,25.961395,80.159471,sand,2022-11-30,fluvial,Wilkerson and Parker (2011),India,0.18,,"Yamuna (at Hamirpur, U.P. state) India"
4,wilkerson-4,25.397259,85.948165,sand,2020-04-18,fluvial,Wilkerson and Parker (2011),India,0.43,,Ganga (at Mokameh in Bihar state) India
5,wilkerson-5,29.40667,71.567847,sand,2022-10-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.17,,Sutlej (in Samasatta below railway bridge) Pak...
6,wilkerson-9,15.163073,75.885587,fine,2022-12-16,fluvial,Wilkerson and Parker (2011),India,0.04,,"Bennihalli (above Railway Bridge, Gadag Hubli ..."
7,chaplin-0,40.633863,-75.464181,gravel,2018-06-15,fluvial,Chaplin (2005),USA,38.2,118.0,Clear signs of mining


In [15]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm
import file_management as fm

In [16]:
INDEX = 0

In [17]:
sampling_buffer_m = 5
Map = geemap.Map()
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Manual Loop

Keep running the following 2 cells until you're done with the manual sampling.


In [36]:
#Uncomment to change the date of the image
output['Date'].iloc[INDEX] = '2022-10-14'
# output['ID'].iloc[INDEX] = 'ns-49'

In [37]:
s1_s2, sample = qm.get_s1s2_data(output, Map, INDEX, display_smap=False, mosaic_method='median', sampling_buffer_m=sampling_buffer_m, 
                                 max_search_window_months=3, median_samples=5,roi_buffer_m=5000, obia=True)

Index:  7  ID:  chaplin-0 Class:  gravel  Site:  fluvial
Search window from 2022-07-14 to 2023-01-14


## Use this immediately after the map is refreshed to either select or discard the sample

INDEX will auto update on calling this function.
It will also overwrite just the sheet if the same exists, but will preserve the other sheets in the book

In [38]:
output, INDEX = qm.get_training_sample(output, s1_s2, sample, Map, INDEX, display_clusters=False, obia=True)
# fm.write_to_excel(output, output_file_path, sheet_name)
with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
    output.to_excel(writer,sheet_name=sheet_name,index=False)
output.iloc[INDEX-1]

New marker accepted
Kept Observation


ID                              chaplin-0
Latitude                        40.634518
Longitude                      -75.464091
Class                              gravel
Date                           2022-10-14
Site                              fluvial
Source                     Chaplin (2005)
Country                               USA
D50                                  38.2
Notes                               118.0
Location Notes      Clear signs of mining
B2_mean                       1481.864136
B3_mean                       1831.106812
B4_mean                       2106.776611
B8_mean                        2422.76709
B8A_mean                       2402.70874
B11_mean                       2281.18457
B12_mean                      2022.135864
VV_mean                         -6.604371
VH_mean                        -13.797893
mTGSI_mean                        0.02255
BSI_mean                         0.058389
NDWI_mean                       -0.139592
keep                              

## Final Output

In [34]:
output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,D50,Notes,...,B11_mean,B12_mean,VV_mean,VH_mean,mTGSI_mean,BSI_mean,NDWI_mean,keep,location_tweaked,class_code
0,wilkerson-0,19.899304,102.140084,sand,2022-02-25,fluvial,Wilkerson and Parker (2011),Laos,0.31,,...,,,,,,,,False,False,99
1,wilkerson-1,28.858429,70.391257,sand,2021-04-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.18,,...,3642.42627,3725.152588,-18.692795,-25.229696,0.086209,0.098757,-0.080478,True,False,1
2,wilkerson-2,9.141009,4.797954,sand,2021-04-30,fluvial,Wilkerson and Parker (2011),Nigeria,0.45,,...,2563.118408,2469.901367,-18.459393,-26.94313,0.106122,0.145168,-0.02819,True,False,1
3,wilkerson-3,25.961395,80.159471,sand,2022-04-03,fluvial,Wilkerson and Parker (2011),India,0.18,,...,4124.906738,4101.604492,-19.55765,-26.804512,0.107884,0.131114,-0.159184,True,False,1
4,wilkerson-4,25.397033,85.947653,sand,2020-04-18,fluvial,Wilkerson and Parker (2011),India,0.43,,...,4070.888916,4067.142822,-17.832499,-28.141626,0.087083,0.106525,-0.089916,True,True,1
5,wilkerson-5,29.406617,71.567756,sand,2022-10-25,fluvial,Wilkerson and Parker (2011),Pakistan,0.17,,...,3866.44751,3733.923096,-17.220385,-27.744926,0.073709,0.100285,-0.052254,True,True,1
6,wilkerson-9,15.163073,75.885587,fine,2022-12-16,fluvial,Wilkerson and Parker (2011),India,0.04,,...,2174.612305,1625.705444,-10.930077,-18.54911,-0.015049,0.074415,-0.287985,True,False,0
7,chaplin-0,40.633863,-75.464181,gravel,2018-06-15,fluvial,Chaplin (2005),USA,38.2,118.0,...,,,,,,,,False,False,99
