In [14]:
import os
import cv2
import json
import random
import numpy as np
from datetime import datetime, timedelta
from osgeo import gdal
from matplotlib import pyplot as plt

In [2]:
DATA_ROOT_DIR = "/Volumes/X/Data/fusion-s1-s2/"

In [3]:
def closest_date(target_date, date_array):
    target = datetime.strptime(target_date, '%Y%m%d')
    date_array = [datetime.strptime(date, '%Y%m%d') for date in date_array]
    closest_date = min(date_array, key=lambda x: abs(target - x))
    return closest_date.strftime('%Y%m%d')

In [4]:
def date_difference(date1, date2):
    d1 = datetime.strptime(date1, '%Y%m%d')
    d2 = datetime.strptime(date2, '%Y%m%d')
    difference = abs(d1 - d2)

    # Convert the difference to 'YYYYMMDD' format
    years = difference.days // 365
    months = (difference.days % 365) // 30
    days = (difference.days % 365) % 30

    return f'{years:04d}{months:02d}{days:02d}'

In [5]:
CROP_SIZE = 1024
INPUT_IMAGE_SIZE = 10980
length = range(0, INPUT_IMAGE_SIZE - CROP_SIZE, CROP_SIZE)
ALL_XY_COORDINATES = [(x, y) for x in length for y in length]

In [8]:
with open("data/fmask_stats.json", "r") as f:
    fmask = json.load(f)

In [9]:
fmask_wo_no_obs = {}

for k, v in fmask.items():
    if "no_observation" not in v or v["no_observation"] < 5:
        fmask_wo_no_obs[k] = v

In [10]:
cloudy_fmask = {}

for k, v in fmask_wo_no_obs.items():
    if "cloud" in v and v["cloud"] > 20:
        cloudy_fmask[k] = v

In [11]:
S2_ROOT_PATH = f"{DATA_ROOT_DIR}s2/sre-10m/"
S1_ROOT_PATH = f"{DATA_ROOT_DIR}s1db/32VNH/threeband/044/"

s1_dates = [date.split("_")[-3] for date in os.listdir(S1_ROOT_PATH)]
s2_to_explore = [path for path in os.listdir(S2_ROOT_PATH) if path in ["_".join(f.split("_")[:-1]) for f in list(cloudy_fmask.keys())]]
data = []
required_s2_bands = ["B02", "B03", "B04"]

In [80]:
dataset = {}
idx = 0

for date_name in s2_to_explore:
    date_str = date_name.split("_")[-1]
    s1_closest_date = closest_date(target_date=date_str, date_array=s1_dates)
    # s2_closest_date_with_clouds = ????

    s2_bands_path = os.listdir(f"{S2_ROOT_PATH}{date_name}/")

    data_input = []

    if all([f"{date_name}_{band}.tif" in s2_bands_path for band in required_s2_bands]):
        for band in required_s2_bands:
            data_input.append(f"{S2_ROOT_PATH}{date_name}/{date_name}_{band}.tif")
        data_input.append(f"{S1_ROOT_PATH}{s1_closest_date}/{s1_closest_date}_A_044.tif")
    
        dataset[idx] = data_input
        idx += 1


In [81]:
dataset

{0: ['/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190108/S2_32VNH_20190108_B02.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190108/S2_32VNH_20190108_B03.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190108/S2_32VNH_20190108_B04.tif',
  '/Volumes/X/Data/fusion-s1-s2/s1db/32VNH/threeband/044/20190103/20190103_A_044.tif'],
 1: ['/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190118/S2_32VNH_20190118_B02.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190118/S2_32VNH_20190118_B03.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190118/S2_32VNH_20190118_B04.tif',
  '/Volumes/X/Data/fusion-s1-s2/s1db/32VNH/threeband/044/20190115/20190115_A_044.tif'],
 2: ['/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190217/S2_32VNH_20190217_B02.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190217/S2_32VNH_20190217_B03.tif',
  '/Volumes/X/Data/fusion-s1-s2/s2/sre-10m/S2_32VNH_20190217/S2_32VNH_20190217_B04.tif',
  '/Volumes/X/