In [None]:
import os
import json
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt

from osgeo import gdal

In [None]:
DATA_ROOT_DIR = "/Volumes/X/Data/fusion-s1-s2/"
S2_ROOT_PATH = f"{DATA_ROOT_DIR}s2/sre-10m/"
ORBIT = "044"
S1_ROOT_PATH = f"{DATA_ROOT_DIR}s1db/32VNH/threeband/{ORBIT}/"

In [None]:
def closest_date(target_date, date_array):
    target = datetime.strptime(target_date, '%Y%m%d')
    date_array = [datetime.strptime(date, '%Y%m%d') for date in date_array]
    closest_date = min(date_array, key=lambda x: abs(target - x))
    return closest_date.strftime('%Y%m%d')

In [None]:
def date_difference(date1, date2):
    d1 = datetime.strptime(date1, '%Y%m%d')
    d2 = datetime.strptime(date2, '%Y%m%d')
    difference = abs(d1 - d2)

    # Convert the difference to 'YYYYMMDD' format
    years = difference.days // 365
    months = (difference.days % 365) // 30
    days = (difference.days % 365) % 30

    return f'{years:04d}{months:02d}{days:02d}'

In [None]:
with open("data/candidates_filtered_water_MAXED.json", "r") as f:
    candidates = json.load(f)

In [None]:
def closest_date(target_date, date_array):
    target = datetime.strptime(target_date, '%Y%m%d')
    date_array = [d for d in date_array if "Store" not in d]
    date_array = [datetime.strptime(date, '%Y%m%d') for date in date_array]
    closest_date = min(date_array, key=lambda x: abs(target - x))
    return closest_date.strftime('%Y%m%d')

In [None]:
def closest_date_before(target_date, date_array):
    target = datetime.strptime(target_date, '%Y%m%d')
    date_array = [d for d in date_array if "Store" not in d]
    date_array = [datetime.strptime(date, '%Y%m%d') for date in date_array]
    before_target = [date for date in date_array if date <= target]
    try:
        closest_date = max(before_target)
    except:
        closest_date = target
    return closest_date.strftime('%Y%m%d')

def closest_date_after(target_date, date_array):
    target = datetime.strptime(target_date, '%Y%m%d')
    date_array = [d for d in date_array if "Store" not in d]
    date_array = [datetime.strptime(date, '%Y%m%d') for date in date_array]
    after_target = [date for date in date_array if date > target]
    try:
        closest_date = min(after_target)
    except:
        closest_date = target
    return closest_date.strftime('%Y%m%d')

In [37]:
DATASET_TRAIN, DATASET_TEST, DATASET = {}, {}, {}

In [None]:
s1_dates = [d.split("_")[-1] for d in os.listdir(f"data/cropped/s1/")]
idx = 0

for k, v in candidates.items():
    cloudy = v["cloudy"]
    cloudy_name = "_".join(v["cloudy"].split("_")[:3])

    cloud_free = v["cloud_free"]
    cloud_free_name = "_".join(v["cloud_free"].split("_")[:3])
    date_cloudy = cloudy.split("_")[2]

    s1_date = closest_date(date_cloudy, s1_dates)
    after_date = closest_date_after(date_cloudy, s1_dates)
    before_date = closest_date_before(date_cloudy, s1_dates)

    TEMP_DATASET = {
        "s2_cloudy_B02": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B02/{'_'.join(cloudy.split('_')[:3])}_B02_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B03": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B03/{'_'.join(cloudy.split('_')[:3])}_B03_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B04": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B04/{'_'.join(cloudy.split('_')[:3])}_B04_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B05": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B05/{'_'.join(cloudy.split('_')[:3])}_B05_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B06": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B06/{'_'.join(cloudy.split('_')[:3])}_B06_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B07": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B07/{'_'.join(cloudy.split('_')[:3])}_B07_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B08": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B08/{'_'.join(cloudy.split('_')[:3])}_B08_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B8A": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B8A/{'_'.join(cloudy.split('_')[:3])}_B8A_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B11": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B11/{'_'.join(cloudy.split('_')[:3])}_B11_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloudy_B12": f"data/cropped/s2/{cloudy_name}/{cloudy_name}_B12/{'_'.join(cloudy.split('_')[:3])}_B12_{'_'.join(cloudy.split('_')[3:])}",
        "s2_cloud_free_B02": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B02/{'_'.join(cloud_free.split('_')[:3])}_B02_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B03": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B03/{'_'.join(cloud_free.split('_')[:3])}_B03_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B04": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B04/{'_'.join(cloud_free.split('_')[:3])}_B04_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B05": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B05/{'_'.join(cloud_free.split('_')[:3])}_B05_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B06": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B06/{'_'.join(cloud_free.split('_')[:3])}_B06_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B07": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B07/{'_'.join(cloud_free.split('_')[:3])}_B07_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B08": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B08/{'_'.join(cloud_free.split('_')[:3])}_B08_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B8A": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B8A/{'_'.join(cloud_free.split('_')[:3])}_B8A_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B11": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B11/{'_'.join(cloud_free.split('_')[:3])}_B11_{'_'.join(cloud_free.split('_')[3:])}",
        "s2_cloud_free_B12": f"data/cropped/s2/{cloud_free_name}/{cloud_free_name}_B12/{'_'.join(cloud_free.split('_')[:3])}_B12_{'_'.join(cloud_free.split('_')[3:])}",
        "s1_hv": f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{s1_date}_HV/S1_32VNH_{s1_date}_HV_{'_'.join(cloudy.split('_')[3:])}",
        "s1_vv": f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{s1_date}_VV/S1_32VNH_{s1_date}_VV_{'_'.join(cloudy.split('_')[3:])}",
        "s1_hv_-1":f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{before_date}_HV/S1_32VNH_{before_date}_HV_{'_'.join(cloudy.split('_')[3:])}",
        "s1_hv_+1":f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{after_date}_HV/S1_32VNH_{after_date}_HV_{'_'.join(cloudy.split('_')[3:])}",
        "s1_vv_-1":f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{before_date}_HV/S1_32VNH_{before_date}_HV_{'_'.join(cloudy.split('_')[3:])}",
        "s1_vv_+1":f"data/cropped/s1/S1_32VNH_{s1_date}/S1_32VNH_{after_date}_HV/S1_32VNH_{after_date}_HV_{'_'.join(cloudy.split('_')[3:])}",
    }

    if all([os.path.isfile(tv) for tv in TEMP_DATASET.values()]):
        DATASET[idx] = TEMP_DATASET
        idx += 1

with open("data/dataset_filtered_water_timeseries.json", "w") as f:
    json.dump(DATASET, f)


In [38]:
# Separate dataset
with open("data/dataset_filtered_water_train_filtered.json", "r") as f:
    DATASET = json.load(f)
    
dataset_keys = list(DATASET.keys())
np.random.shuffle(dataset_keys)
split_index = int(0.9 * len(dataset_keys))

dataset_keys_train = dataset_keys[:split_index]
dataset_keys_test = dataset_keys[split_index:]

In [40]:
i = 0
for k in dataset_keys_train:
    DATASET_TRAIN[i] = DATASET[k]
    i += 1

with open("data/dataset_filtered_water_filtered_train.json", "w") as f:
    json.dump(DATASET_TRAIN, f)

l = 0
for k in dataset_keys_test:
    DATASET_TEST[l] = DATASET[k]
    l += 1
with open("data/dataset_filtered_water_filtered_test.json", "w") as f:
    json.dump(DATASET_TEST, f)


In [None]:
with open("data/dataset_filtered_water_train.json", "r") as f:
    dataset = json.load(f)

In [None]:
for k, v in dataset.items():
    print(k)
    cB2 = gdal.Open(f"{v['s2_cloudy_B02']}").ReadAsArray()
    cB3 = gdal.Open(f"{v['s2_cloudy_B03']}").ReadAsArray()
    cB4 = gdal.Open(f"{v['s2_cloudy_B04']}").ReadAsArray()

    c = np.stack((cB4, cB3, cB2), axis=-1)/2000

    gB2 = gdal.Open(f"{v['s2_cloud_free_B02']}").ReadAsArray()
    gB3 = gdal.Open(f"{v['s2_cloud_free_B03']}").ReadAsArray()
    gB4 = gdal.Open(f"{v['s2_cloud_free_B04']}").ReadAsArray()

    g = np.stack((gB4, gB3, gB2), axis=-1)/2000

    plt.figure()

    # Create the first subplot, add title, and display the first image
    plt.subplot(1, 2, 1)
    plt.title("input")
    plt.imshow(c)
    plt.axis('off')

    # Create the second subplot, add title, and display the second image
    plt.subplot(1, 2, 2)
    plt.title("truth")
    plt.imshow(g)
    plt.axis('off')

    # Save the figure to a file
    print(os.getcwd())

    plt.savefig(f"filtering/{k}_{v['s2_cloudy_B02'].split('.')[0].split('/')[-1]}.png")

    # Close the figure
    plt.close()
    

In [31]:
to_remove = [
    "14", "17", "50", "72", "88", "107", "116", "141", "186", "245", "275", "355", "357", "381", "421", "462", "526", "527",
    "530", "534", "551", "557", "562", "576", "655", "656", "661", "704", "723", "726", "737", "744", "769", "781",
    "881", "892", "916", "921", "946", "976", "1051", "1055", "1065", "1078", "1127", "1133", "1139", "1145", "1151", "1154",
    "1159", "1182", "1183", "1186", "1205", "1216", "1232", "1252", "1257", "1267", "1294", "1299", "1301", "1318", "1348",
    "1366", "1369", "1385", "1406", "1414"

]

maybe = ["20", "30", "36", "51", "52", "53", "57", "65", "73", "94", "118", "139", "150", "151", "163", "162"
         "173", "186", "201", "830", "829"]

fin = [""]
moyen = ["209"]
gros = []

to_look_into = "590_S2_32VNH_20190505_B02_952_515340_6241120_256 and 591_S2_32VNH_20200812_B02_952_515340_6241120_256"

In [32]:
paths = []
for id in to_remove:
    path = dataset[id]
    del dataset[id]

In [33]:
with open("data/dataset_filtered_water_train_filtered.json", "w") as f:
    json.dump(dataset, f)