In [1]:
%reload_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv
%matplotlib inline

import os 
import glob
import io
import math
from datetime import datetime 
import shapely.geometry
import json
from pathlib import Path
import pathlib

import IPython.display
from IPython.display import display
from ipywidgets import GridspecLayout
import ipywidgets as widgets

from xcube_sh.cube import open_cube
from xcube_sh.observers import Observers

from src.GIS_utils import bbox_from_point
from src.config import CubeConfig
from src.preprocess import preprocess, plot_cube_and_background, save_labels, save_cubes


import numpy as np
from skimage.io import imread
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

import superintendent

In [None]:
def request_save_cubes(start_date, end_date, lat, lon, RADIUS = 500, 
               dataset_name='S2L1C', band_names=['B03', 'B08', 'CLP'], 
               max_cloud_proba=0.1, time_period='1D'):
    
    bbox = bbox_from_point(lat=lat, lon=lon, r=RADIUS)
    cube_config = CubeConfig(dataset_name=dataset_name,
                             band_names=band_names, # GREEN + NIR + Clouds
                             tile_size=[2*RADIUS//10, 2*RADIUS//10],
                             geometry=bbox,
                             time_range=[start_date, end_date],
                             time_period=time_period,
                            )
    request_collector = Observers.request_collector()
    cube = open_cube(cube_config, observer=request_collector)

    cube, background_ndwi = preprocess(cube, max_cloud_proba=max_cloud_proba, 
                                       nans_how='any', verbose=1, plot_NDWI=False)
    save_cubes(cube, background_ndwi, lat_lon=(lat,lon), data_dir=Path(data_chips_dir), verbose=False)


In [None]:
aoi_json_filepath = "data/aoi.json"
data_chips_dir = "/home/jovyan/data/chips/"

start_date_2019 = '2019-01-01'
end_date_2019 = '2019-06-30'

start_date_2020 = '2020-01-01'
end_date_2020 = '2020-06-30'

### request and download images

In [None]:
with open(aoi_json_filepath, "r") as f: 
    aoi_file = json.load(f)
    straits_dict = aoi_file['Straits']
    for aoi_name, aoi_locs in straits_dict.items():
        for loc in aoi_locs:
            lat, lon = loc[0], loc[1]
            subdir = 'lat_{}_lon_{}'.format(str(lat).replace('.','_'), str(lon).replace('.','_'))
            # 2019 
            if not Path(data_chips_dir, subdir).exists() or len(list(Path(data_chips_dir, subdir).glob("*2019*.png"))) == 0:
                print(f"aoi name: {aoi_name}, subdir {subdir}, download 2019 images")
                request_save_cubes(start_date_2019, end_date_2019, lat, lon)
            # 2020
            if not Path(data_chips_dir, subdir).exists() or len(list(Path(data_chips_dir, subdir).glob("*2020*.png"))) == 0:
                print(f"aoi name: {aoi_name}, subdir {subdir}, download 2020 images")
                request_save_cubes(start_date_2020, end_date_2020, lat, lon)    

### extract images to be labeled

In [None]:
stratis_images = []
with open(aoi_json_filepath, "r") as f: 
    aoi_file = json.load(f)
    straits_dict = aoi_file['Straits']
    for aoi_name, aoi_locs in straits_dict.items():
        for loc in aoi_locs:
            lat, lon = loc[0], loc[1]
            subdir = 'lat_{}_lon_{}'.format(str(lat).replace('.','_'), str(lon).replace('.','_'))
            if Path(data_chips_dir, subdir).exists():
                stratis_images.extend(Path(data_chips_dir, subdir).glob("img_ndwi*.png"))
len(stratis_images), stratis_images[0]

In [None]:
csv_file_path = "/home/jovyan/data/straits_labels.csv"
labels_df = pd.read_csv(csv_file_path, index_col="file_path")
len(labels_df)

In [None]:
tobe_labeled_images = []
for dir_ in stratis_images:
     if str(dir_) not in labels_df.index or pd.isnull(labels_df.loc[str(dir_), "count"]):
        tobe_labeled_images.append(dir_)

In [None]:
len(tobe_labeled_images)

In [None]:
# images = [f for f in pathlib.Path("/home/jovyan/data/chips").glob("*/img_ndwi*.png") if f.is_file()]
images = tobe_labeled_images
print(len(images))

def change_colormap(image_path:pathlib.Path, cmap='RdYlBu'):
    feature = imread(image_path)
    cm = plt.get_cmap(cmap)
    colored_image = cm(-feature)
    colored_feature = Image.fromarray(np.uint8(colored_image * 255))
    imgByteArr = io.BytesIO()
    colored_feature.save(imgByteArr, format='PNG')
    imgByteArr = imgByteArr.getvalue()
    return imgByteArr
    
def display_image_and_references(image_path):
    image_folder = image_path.parent
    print(image_folder)
    
    other_images = [
        f for f in image_folder.glob("img_ndwi*.png")
        if f.is_file() and f != image_path
    ]
    other_images.extend(image_folder.glob("bg_ndwi*.png"))
   
    n_col = 4
    n_row = max(math.ceil(len(other_images) / n_col), 1)
    grid = GridspecLayout(n_row, n_col)
    
    for i in range(n_row):
        for j in range(n_col):
            img_index = i * n_col + j
            if img_index >= len(other_images): break
            image = other_images[img_index]
            grid[i, j] = widgets.VBox([
                    widgets.Label(f"Image {image.name}"),
                    widgets.Image(value=change_colormap(image), 
                             layout=widgets.Layout(width='200px', height='200px')),
                ])
    
    image_display = widgets.VBox([
        widgets.VBox([
            widgets.Label(f"folder: {image_folder}"),
            widgets.Label(f"all other images of the same loc"),
            grid,
            widgets.Label(f"image to label: {image_path.name}"),
            widgets.Image(value=change_colormap(image_path), object_fit='none', 
                          layout=widgets.Layout(width='300px', height='300px'))
        ]),
        
    ])
    display(image_display)

labeller = superintendent.ClassLabeller(
    features=images,
    options=[i for i in range(-1, 6)],
    display_func=display_image_and_references,
)

labeller

In [None]:
tobe_labeled_images
labeller.new_labels
print(len(tobe_labeled_images))

In [None]:
for i in range(len(tobe_labeled_images)):
    labels_df.at[str(tobe_labeled_images[i])] = labeller.new_labels[i]

In [None]:
len(labels_df)

In [None]:
labels_df = labels_df[labels_df["count"].notnull()]

In [None]:
len(labels_df)

In [None]:
labels_df['count'] = labels_df['count'].astype(float)

In [None]:
labels_df['count'].hist()

In [None]:
# see how imbalance of the data is
len(labels_df[labels_df['count'] > 0])/len(labels_df)

In [None]:
csv_file_path = "/home/jovyan/data/straits_labels.csv"
labels_df.to_csv(csv_file_path)

 ### After merged everthing to labels.csv, we will split path in the labels.csv into 2 columns, lat_lon and timestamp

In [25]:
labels_filename = '/home/jovyan/zhichao/sentinel2-xcube-boat-detection/data/labels.csv'
df_labels = pd.read_csv(labels_filename, index_col = 'file_path', dtype={'count': float}) 
for index, row in df_labels.iterrows():
    file_path = Path(index)
    timestamp = file_path.stem.split('t_')[1]
    lat_lon = file_path.parts[-2]
    df_labels.at[index, "timestamp"] = timestamp
    df_labels.at[index, "lat_lon"] = lat_lon

### Sanity check if all lat_lon under data/chips is in the df. 

In [26]:
data_dir = "/home/jovyan/data/chips"
coords = os.listdir(data_dir)
coords_in_df = df_labels.reset_index().groupby('lat_lon').groups.keys()
for coord in coords:
    if not coord.startswith("."):
        assert coord in coords_in_df

In [28]:
df_labels

Unnamed: 0_level_0,count,timestamp,lat_lon
file_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
/home/jovyan/data/chips/lat_36_02_lon_-5_36/img_08_t_2019-05-02.png,0.0,2019-05-02,lat_36_02_lon_-5_36
/home/jovyan/data/chips/lat_36_02_lon_-5_36/img_08_t_2019-04-12.png,0.0,2019-04-12,lat_36_02_lon_-5_36
/home/jovyan/data/chips/lat_36_02_lon_-5_36/img_08_t_2019-01-24.png,0.0,2019-01-24,lat_36_02_lon_-5_36
/home/jovyan/data/chips/lat_36_02_lon_-5_36/img_08_t_2019-03-13.png,0.0,2019-03-13,lat_36_02_lon_-5_36
/home/jovyan/data/chips/lat_36_02_lon_-5_36/img_08_t_2020-03-22.png,0.0,2020-03-22,lat_36_02_lon_-5_36
...,...,...,...
/home/jovyan/data/chips/lat_57_07_lon_24_01/img_08_t_2020-04-17.png,0.0,2020-04-17,lat_57_07_lon_24_01
/home/jovyan/data/chips/lat_57_07_lon_24_01/img_08_t_2020-03-13.png,0.0,2020-03-13,lat_57_07_lon_24_01
/home/jovyan/data/chips/lat_57_07_lon_24_01/img_08_t_2020-05-02.png,0.0,2020-05-02,lat_57_07_lon_24_01
/home/jovyan/data/chips/lat_57_07_lon_24_01/img_08_t_2020-04-27.png,0.0,2020-04-27,lat_57_07_lon_24_01


### Given coords and bands, get all the images

In [7]:
df_labels.reset_index(inplace=True)
df_labels["count"] = df_labels["count"].astype('float')
df_labels_valid = df_labels[df_labels["count"] >= 0.0]


In [8]:
len(df_labels_valid)

1977

In [9]:
df_labels_valid_groupby = df_labels_valid.groupby("lat_lon")
df_labels_valid_groupby.get_group(name = 'lat_36_02_lon_-5_36')["timestamp"]

0     2019-05-02
1     2019-04-12
2     2019-01-24
3     2019-03-13
4     2020-03-22
5     2019-06-18
6     2020-03-04
7     2020-04-03
8     2019-05-19
9     2019-05-24
10    2019-06-23
11    2020-02-01
12    2020-01-19
13    2019-01-02
14    2019-04-14
15    2019-06-21
16    2019-01-17
17    2019-03-15
18    2020-01-04
19    2019-03-28
20    2019-04-27
21    2020-05-08
22    2019-02-03
23    2019-03-20
24    2019-01-04
25    2019-03-23
26    2019-01-07
27    2019-06-11
28    2019-04-04
29    2019-02-06
30    2020-05-03
31    2019-05-29
32    2019-06-28
33    2020-02-11
34    2019-05-07
35    2019-01-29
36    2019-02-28
37    2019-06-06
38    2020-03-27
39    2019-01-12
40    2019-03-10
41    2020-03-09
42    2020-05-16
43    2019-02-11
44    2019-01-09
45    2019-05-14
46    2019-02-08
47    2020-01-09
48    2020-01-27
49    2020-02-26
50    2020-05-01
51    2019-05-09
52    2019-01-14
53    2020-03-29
54    2020-05-18
55    2019-03-03
Name: timestamp, dtype: object

In [11]:

# data_dir='/home/jovyan/data/chips'
# band_list = ['img_08', 'bg_ndwi']
# coordinates = np.array(["lat_40_148_lon_18_492"])
# df_labels_orig = pd.read_csv("/home/jovyan/data/labels.csv")

# def get_img_paths(coords):
#     img_paths = []
#     for subdir in coords:
#         for filename in os.listdir(os.path.join(data_dir,subdir)):
#             if filename.startswith(band_list[0]):
# #                 print(os.path.join(data_dir,subdir,filename))
# #                 print(df_labels_orig[df_labels_orig['file_path']==os.path.join(data_dir,subdir,filename)]['count'].values)
#                 if int(df_labels_orig[df_labels_orig['file_path']==os.path.join(data_dir,subdir,filename)]['count'].values)>=0:
#                     filenames = [os.path.join(data_dir,subdir,filename)]
#                     for band in band_list[1:]:
#                         if band.startswith('bg'):
#                             filenames.append(os.path.join(data_dir,subdir,band+'.png'))
#                         elif band.startswith('img'):
#                             filenames.append(os.path.join(data_dir,subdir,filename.replace(band_list[0],band)))
#                     img_paths.append(filenames)
#     img_paths = np.array(img_paths)
#     return img_paths


df_labels_groupby = df_labels.groupby("lat_lon")
band_list = ['bg_ndwi', 'img_08', 'img_03']

coords = np.array(["lat_38_211_lon_15_567"])
def get_img_paths(coordinates):
    img_paths = []
    for subdir in coordinates:
        timestamps = df_labels_groupby.get_group(name = subdir)["timestamp"]
        for timestamp in timestamps:
            img_timestamp = []
            for band in band_list:
                if band.startswith('bg'):
                    img_timestamp.extend(glob.glob(os.path.join(data_dir, subdir, band +  "*.png")))
                else:
                    img_timestamp.extend(glob.glob(os.path.join(data_dir, subdir, band + "*t_" + timestamp + "*.png")))
            img_paths.append(img_timestamp)            
    return np.array(img_paths)
get_img_paths(coords)

array([['/home/jovyan/data/chips/lat_38_211_lon_15_567/bg_ndwi.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_08_t_2020-02-07.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_03_t_2020-02-07.png'],
       ['/home/jovyan/data/chips/lat_38_211_lon_15_567/bg_ndwi.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_08_t_2019-06-29.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_03_t_2019-06-29.png'],
       ['/home/jovyan/data/chips/lat_38_211_lon_15_567/bg_ndwi.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_08_t_2020-01-08.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_03_t_2020-01-08.png'],
       ['/home/jovyan/data/chips/lat_38_211_lon_15_567/bg_ndwi.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_08_t_2020-05-04.png',
        '/home/jovyan/data/chips/lat_38_211_lon_15_567/img_03_t_2020-05-04.png'],
       ['/home/jovyan/data/chips/lat_38_211_lon_15_567/bg_ndwi.png',
   

In [29]:
df_labels.to_csv('/home/jovyan/zhichao/sentinel2-xcube-boat-detection/data/labels.csv')

In [19]:
pd.read_csv('/home/jovyan/data/labels.csv', dtype={'count': float})

Unnamed: 0,file_path,count
0,/home/jovyan/data/chips/lat_36_02_lon_-5_36/im...,0.0
1,/home/jovyan/data/chips/lat_36_02_lon_-5_36/im...,0.0
2,/home/jovyan/data/chips/lat_36_02_lon_-5_36/im...,0.0
3,/home/jovyan/data/chips/lat_36_02_lon_-5_36/im...,0.0
4,/home/jovyan/data/chips/lat_36_02_lon_-5_36/im...,0.0
...,...,...
2208,/home/jovyan/data/chips/lat_57_07_lon_24_01/im...,0.0
2209,/home/jovyan/data/chips/lat_57_07_lon_24_01/im...,0.0
2210,/home/jovyan/data/chips/lat_57_07_lon_24_01/im...,0.0
2211,/home/jovyan/data/chips/lat_57_07_lon_24_01/im...,0.0
