In [None]:
# Check container OS version (for correct UbuntuGIS package version)
!lsb_release -a

In [None]:
# Dark magic happens here: installing dependencies for GDAL 3.0.4
# build process via APT and install GDAL itself via PyPI
!time (add-apt-repository -y ppa:ubuntugis/ubuntugis-unstable && \
 apt install python3-gdal=3.0.4+dfsg-1~bionic0 && \
 apt purge --autoremove python3-gdal && \
 pip install gdal==3.0.4 && \
 apt install gdal-bin=3.0.4+dfsg-1~bionic0)

from osgeo import gdal; print(f"\nGDAL version {(gdal.__version__)}")

In [None]:
# Google Drive

from os import path as osp

from google.colab import drive

PATH_DRIVE = osp.join('/', 'content', 'drive')

# Do not mount if it is already attached
if not osp.exists(PATH_DRIVE):
    print("Mounting Google Drive...")
    drive.mount(PATH_DRIVE)
else:
    print("Google Drive has been already mounted!")

In [None]:
from os import path as osp

PATH_STORAGE = osp.join('ods', 'soc')  # arbitrary subpath in Google Drive
if 'PATH_DRIVE' in locals():
    PREFIX_DRIVE = osp.join(osp.basename(PATH_DRIVE), 'MyDrive', PATH_STORAGE)
else:
    PREFIX_DRIVE = ''

PATH_TEMP = osp.join('/', 'content', 'temp')
PATH_INPUT = osp.join('/', 'content', PREFIX_DRIVE, 'input')
PATH_OUTPUT = osp.join('/', 'content', PREFIX_DRIVE, 'output')
PATH_RESOURCES = osp.join('/', 'content', 'resources')
PATH_SNAPSHOTS = osp.join('/', 'content', 'snapshots')
# Uncomment to save the source Copernicus archives in Google Drive (extra space)
# PATH_SNAPSHOTS = osp.join('/', 'content', PREFIX_DRIVE, 'snapshots')

print('\n'.join((PATH_STORAGE, PATH_TEMP, PATH_INPUT, PATH_OUTPUT,
                 PATH_RESOURCES, PATH_SNAPSHOTS)))

In [None]:
!pip install git+https://github.com/MaritimeAI/copernicus
!git clone https://github.com/MaritimeAI/resources.git {PATH_RESOURCES}

In [None]:
FILE_SHAPEFILE = osp.join(PATH_RESOURCES, 'clustering', 'cutline',
                          'Start_Ice_Map_UTMz40WGS84f_r.shp')

In [None]:
SPLIT = 1

In [None]:
%ls {PATH_INPUT}

In [None]:
# %%time

import os
import json

from getpass import getpass
from datetime import datetime, timedelta

from copernicus import Config
from copernicus import DataHub
from copernicus import download
from copernicus import Polygons


FORMAT_COPERNICUS_DATETIME = '%Y-%m-%dT%H:%M:%S.%f'
PATH_AREA_SEARCH = osp.join('resources', 'copernicus',
                            'areas', 'pechora.geojson')

if os.getenv('COPERNICUS_USERNAME', None) is None:
    os.environ['COPERNICUS_USERNAME'] = getpass('Username:')
if os.getenv('COPERNICUS_PASSWORD', None) is None:
    os.environ['COPERNICUS_PASSWORD'] = getpass('Password:')

config = Config()
config.username = os.environ['COPERNICUS_USERNAME']  # <-- set Copernicus Open Access Hub username here
config.password = os.environ['COPERNICUS_PASSWORD']  # <-- set Copernicus Open Access Hub password here

data_hub = DataHub(config)

area = json.load(open(PATH_AREA_SEARCH))
search = area['features'][0]['properties'].copy()

# Uncomment to search snapshots for specific date
# now = datetime.now().replace(month=5, day=4, hour=0, minute=0,
#                              second=0, microsecond=0)
now = datetime.now()
# day = timedelta(hours=23, minutes=59,
#                 seconds=59, microseconds=999999)

date_start = datetime(2021, 10, 1).strftime(FORMAT_COPERNICUS_DATETIME)[:-3] + 'Z'
date_stop = now.strftime(FORMAT_COPERNICUS_DATETIME)[:-3] + 'Z'

# Update time range for yesterday
del search['filenames']
search.update({
    'start': 0,
    'platformName': 'Sentinel-1',
    'productType': 'GRD',
    'beginPosition': f"[{date_start} TO {date_stop}]",
    # 'beginPosition': f"[NOW-2DAYS TO NOW]",
})
# print(f"DEBUG: search = {search}")

polygon, properties = Polygons.read_geojson(PATH_AREA_SEARCH)
snapshots = data_hub.search(search, area=polygon)
# print(f"DEBUG: snapshots = {snapshots}")

config.output = PATH_SNAPSHOTS

print('Snapshots count =', len(snapshots))
# for i, snapshot in enumerate(snapshots):
    # print(f"{i:03d}", snapshot.__dict__)
    # download(snapshot.link, config)
    # print()

In [None]:
import pandas as pd


frame_snapshots = pd.DataFrame({k: [s.__dict__[k] for s in snapshots] \
                                for k in snapshots[0].__dict__})\
                                .sort_values(['begin_position'])

pd.set_option('display.max_rows', frame_snapshots.shape[0] + 1)

frame_snapshots['polygon'] = (frame_snapshots['polygon']
                              .apply(lambda x: x.wkt if type(x) is not str else x))
frame_snapshots['week'] = (frame_snapshots['begin_position']
                           .apply(lambda x: x.isocalendar()[1]))
frame_snapshots['split'] = frame_snapshots['week'].apply(lambda x: x % 5 + 1)

frame_snapshots = frame_snapshots.set_index('uuid')

frame_snapshots

In [None]:
frame_snapshots_split = frame_snapshots[frame_snapshots['split'] == SPLIT]

index = 0
for uuid, snapshot in frame_snapshots_split.iterrows():
    index += 1
    print(f"{index:03d}", snapshot['title'])
    download(snapshot['link'], config)
    print()

In [None]:
!pip uninstall -y maritimeai
!pip install git+https://github.com/MaritimeAI/maritimeai@master#egg=maritimeai

In [None]:
# %%time

from glob import glob
from zipfile import BadZipFile

from maritimeai import process_sentinel1

filenames = []

if osp.isdir(PATH_SNAPSHOTS):
    for filename in glob(osp.join(PATH_SNAPSHOTS, '*.zip')):
        try:
            print(f"Processing {filename}...")
            filenames.extend(process_sentinel1(filename, PATH_TEMP,
                                               area='default',
                                               shapes=[FILE_SHAPEFILE],
                                               negative=False))
        except BadZipFile:
            print(f"ERROR: {filename} is damaged!")
else:
    raise FileNotFoundError(f"Path '{PATH_SNAPSHOTS}' must exist!")

In [None]:
filenames

In [None]:
from glob import glob

import cv2 as cv


frame_snapshots_split['coverage'] = 0.0

for i, row in frame_snapshots_split.iterrows():
    try:
        path = glob(osp.join(PATH_TEMP, 'default*', 'hh',
                             f"{row['title']}.tiff"))[0]
        coverage = (cv.imread(path, cv.IMREAD_LOAD_GDAL) > 0).mean()
        frame_snapshots_split.loc[i, 'coverage'] = coverage
    except:
        pass

frame_snapshots_split

In [None]:
THRESHOLD_COVERAGE = 0.1  # ratio 0.08 gives small pieces

In [None]:
from glob import glob
from shutil import copy2 as copy

import cv2 as cv

from matplotlib import pyplot as plt


COPY = True

index_threshold = frame_snapshots_split['coverage'] >= THRESHOLD_COVERAGE
frame_threshold = frame_snapshots_split[index_threshold]

for i, row in frame_threshold.iterrows():
    try:
        filename = glob(osp.join(PATH_TEMP, 'default*', 'negative',
                                f"{row['title']}.tiff"))[0]
        image = cv.imread(filename, cv.IMREAD_LOAD_GDAL)
        print(osp.basename(filename), row['coverage'])
        if COPY:
            path_source = f"{osp.join(PATH_SNAPSHOTS, row['title'])}.zip"
            path_target = path_source.replace(PATH_SNAPSHOTS,
                                              osp.join(PATH_INPUT, 'snapshots'))
            os.makedirs(osp.dirname(path_target), exist_ok=True)
            copy(path_source, path_target)
            print(f"{path_source} -> {path_target}")
            for path_source in filenames:
                if osp.splitext(osp.basename(path_source))[0] == row['title']:
                    path_target = path_source.replace(PATH_TEMP, PATH_INPUT)
                    os.makedirs(osp.dirname(path_target), exist_ok=True)
                    copy(path_source, path_target)
                    print(f"{path_source} -> {path_target}")

        plt.figure(figsize=(15, 15))
        plt.axis('off')

        plt.imshow(cv.cvtColor(image, cv.COLOR_BGR2RGB))
        plt.show()
    except KeyboardInterrupt:
        break
    except:
        pass

In [None]:
path_snapshots_db = osp.join(PATH_INPUT, f"snapshots.{SPLIT}.csv")

try:
    frame_snapshots_read = pd.read_csv(path_snapshots_db, index_col=0)
except:
    raise
    frame_snapshots_read = pd.DataFrame([], columns=frame_snapshots_split.columns)

frame_snapshots_split_ = pd.concat([frame_snapshots_read, frame_snapshots_split])
frame_snapshots_split_ = frame_snapshots_split_[~frame_snapshots_split_.index\
                                                .duplicated(keep='last')]
frame_snapshots_split_.to_csv(path_snapshots_db)

In [None]:
frame_unthreshold = frame_snapshots_split[~index_threshold]

for i, row in frame_unthreshold.iterrows():
    try:
        filename = glob(osp.join(PATH_TEMP, 'default*', 'hh',
                                f"{row['title']}.tiff"))[0]
        image = cv.imread(filename, cv.IMREAD_LOAD_GDAL)
        print(osp.basename(filename), row['coverage'])

        plt.figure(figsize=(15, 15))
        plt.axis('off')

        plt.imshow(cv.cvtColor(image, cv.COLOR_BGR2RGB))
        plt.show()
    except KeyboardInterrupt:
        break
    except:
        pass

In [None]:
%cat {path_snapshots_db}