In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
from tqdm.notebook import tqdm
from IPython.display import clear_output

import ee, geemap

In [2]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(0, '../lib/')

In [3]:
%reload_ext autoreload
import sand_classification as sc
import quality_mosaic as qm

# Part I: Get the locations from Google Drive

In [4]:
sheet_id = '13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos'
# sheet_id = '1XqOtPkiE_Q0dfGSoyxrH730RkwrTczcRbDeJJpqRByQ'
sheet_name = 'nonsand'
drive_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
drive_url

'https://docs.google.com/spreadsheets/d/13nF_pJ02Bd70cDJamuKbvZIkIdJ-kOI4O3Cx9K7Wzos/gviz/tq?tqx=out:csv&sheet=nonsand'

In [5]:
output_file_path = '../data/labels/23-07-26_gt-bands.xlsx'

In [6]:
#Read in a google sheets with the ground truth data from string drive_loc
df = pd.read_csv(drive_url)
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes,Unnamed: 9,...,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27
0,ns-0,25.449886,82.858051,bare,2022-10-03,Airport,Author,India,Varanasi Airport,,...,,,,,,,,,,
1,ns-1,25.230858,83.021355,greenveg,2022-02-19,Vegetated Fields,Author,India,Off Ganga,,...,,,,,,,,,,
2,ns-2,25.246509,83.025825,bare,2022-02-19,Bare fields,Author,India,Off Ganga,,...,,,,,,,,,,
3,ns-3,25.208222,82.980644,water,2022-02-19,River water,Author,India,Ganga,,...,,,,,,,,,,
4,ns-4,22.573816,88.349404,bare,2022-01-02,"Urban scene, kolkata",Author,India,Off Hoogly,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,ns-85,0.329276,32.619909,bare,2022-10-14,Kampala roof,,Uganda,,,...,,,,,,,,,,
86,ns-86,0.308077,32.618006,greenveg,2022-10-14,Kampala grass,,Uganda,,,...,,,,,,,,,,
87,ns-87,0.225412,32.620413,water,2022-10-14,Kampala bay,,Uganda,,,...,,,,,,,,,,
88,ns-88,-24.986835,14.829484,whitewater,2022-04-06,waves,,Namibia,,,...,,,,,,,,,,


In [7]:
#look for any NaN values in the column 'date' or 'Longitude' or 'Latitude' in df_merged and drop them
df = df.dropna(subset=['Date', 'Longitude', 'Latitude']).reset_index(drop=True)
#drop any columns that have 'Unnamed:' in the column name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,Country,Notes
0,ns-0,25.449886,82.858051,bare,2022-10-03,Airport,Author,India,Varanasi Airport
1,ns-1,25.230858,83.021355,greenveg,2022-02-19,Vegetated Fields,Author,India,Off Ganga
2,ns-2,25.246509,83.025825,bare,2022-02-19,Bare fields,Author,India,Off Ganga
3,ns-3,25.208222,82.980644,water,2022-02-19,River water,Author,India,Ganga
4,ns-4,22.573816,88.349404,bare,2022-01-02,"Urban scene, kolkata",Author,India,Off Hoogly
...,...,...,...,...,...,...,...,...,...
85,ns-85,0.329276,32.619909,bare,2022-10-14,Kampala roof,,Uganda,
86,ns-86,0.308077,32.618006,greenveg,2022-10-14,Kampala grass,,Uganda,
87,ns-87,0.225412,32.620413,water,2022-10-14,Kampala bay,,Uganda,
88,ns-88,-24.986835,14.829484,whitewater,2022-04-06,waves,,Namibia,


In [8]:
assert(len(df[df['Date'].isna() | df['Longitude'].isna() | df['Latitude'].isna()]) == 0)

# Part II: Select subset for training

Here we iterate over the df and iteratively pick and adjust the locations we want to keep

In [9]:
df.columns

Index(['ID', 'Latitude', 'Longitude', 'Class', 'Date', 'Site', 'Source',
       'Country', 'Notes'],
      dtype='object')

In [10]:
try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()


In [8]:
# The IDs 0 to 49 are already finetuned and sampled. 
# We overwrite these rows in the df (coming from Google Sheet) with the gt-bands xlsx

previous_finetuned = pd.read_excel('../data/labels/23-06-26_gt-bands-cluster_median.xlsx', sheet_name='nonsand')

# We append rows starting from 50 the the previously finetuned and sampled data.
df_new = df.iloc[50:]
output = pd.concat(
    [previous_finetuned, df_new],
    axis=0
)

output

Unnamed: 0,ID,Latitude,Longitude,Class,Date,Site,Source,keep,location_tweaked,class_code,...,B4_median,B8_median,B8A_median,B11_median,B12_median,VV_median,VH_median,mTGSI_median,BSI_median,NDWI_median
0,ns-0,25.449886,82.858051,bare,2022-10-03,Airport,Author,True,False,5.0,...,2765.000000,3200.0,3391.000000,3873.000000,4003.0,-16.626864,-23.228977,0.099197,0.117601,-0.113991
1,ns-1,25.230677,83.021533,greenveg,2020-12-11,Vegetated Fields,Author,True,True,6.0,...,590.000000,4164.0,4415.000000,2324.800000,1193.0,-8.528657,-14.159153,-0.399877,-0.247475,-0.623728
2,ns-2,25.246734,83.025929,bare,2022-02-19,Bare fields,Author,True,True,5.0,...,1152.000000,2100.5,2229.000000,2461.000000,1971.0,-13.455000,-21.101277,-0.022312,0.056065,-0.270297
3,ns-3,25.208222,82.980644,water,2022-02-19,River water,Author,True,False,4.0,...,1360.000000,810.0,731.500000,289.000000,178.0,-24.285918,-27.224403,-0.127043,-0.147149,0.327812
4,ns-4,22.573932,88.349081,bare,2022-01-02,"Urban scene, kolkata",Author,True,True,5.0,...,777.444444,1077.0,1195.333333,1301.588235,1060.2,-3.060537,-14.773439,-0.002928,0.055635,-0.100738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,ns-85,0.329276,32.619909,bare,2022-10-14,Kampala roof,,,,,...,,,,,,,,,,
86,ns-86,0.308077,32.618006,greenveg,2022-10-14,Kampala grass,,,,,...,,,,,,,,,,
87,ns-87,0.225412,32.620413,water,2022-10-14,Kampala bay,,,,,...,,,,,,,,,,
88,ns-88,-24.986835,14.829484,whitewater,2022-04-06,waves,,,,,...,,,,,,,,,,


In [27]:
# We start the process at this row
INDEX = 50

In [None]:
Map = geemap.Map()
Map

## Manual Loop

Keep running the following 2 cells until you're done with the manual sampling.


In [18]:
#Uncomment to change the date of the image
# output['Date'].iloc[INDEX] = '2021-07-11'
# output['ID'].iloc[INDEX] = 'ns-49'

In [None]:
s1_s2, sample = qm.get_s1s2_data(output, Map, INDEX, display_smap=False, mosaic_method='median', 
                                 max_search_window_months=3, median_samples=5,roi_buffer_m=5000, obia=True)

## Use this immediately after the map is refreshed to either select or discard the sample

It will also overwrite just the sheet if the same exists, but will preserve the other sheets in the book

In [None]:
output, INDEX = qm.get_training_sample(output, s1_s2, sample, Map, INDEX, display_clusters=False, obia=True)
with pd.ExcelWriter(output_file_path,engine="openpyxl",mode="a",if_sheet_exists="replace") as writer:
    output.to_excel(writer,sheet_name=sheet_name,index=False)
output.iloc[INDEX]
INDEX += 1

## Final Output

In [None]:
output