In [1]:
import importlib
from osgeo import gdal
import pprint
import json
import numpy as np
import matplotlib.pyplot as plt
import itertools
from collections import Counter

# 0 => clear land pixel
# 1 => clear water pixel
# 2 => cloud shadow
# 3 => snow
# 4 => cloud

In [2]:
import os

directory = f"/Volumes/X/Data/fusion-s1-s2/s2/fmask/"  # replace with the path to your directory
fmask_paths = [
    f"/Volumes/X/Data/fusion-s1-s2/s2/fmask/" +
    folder_name +
    "/" +
    folder_name +
    "_fmask.tif"
    for folder_name in os.listdir(directory)
]

In [3]:
MAP_CLOUD = {
    0: "clear_land",
    1: "cloud_water", 
    2: "cloud_shadow",
    3: "snow",
    4: "cloud",
    255: "no_observation"
}

In [58]:
def fmask_double(input_path, output_path):
    if os.path.isfile(input_path):
        src_ds = gdal.Open(input_path)
    else:
        src_ds = gdal.Open("/".join(input_path.split("/")[:-1])+"/fmask.tif")

    # Calculate the new dimensions
    new_width = src_ds.RasterXSize * 2
    new_height = src_ds.RasterYSize * 2

    # Set up the warp options
    warp_options = gdal.WarpOptions(format="VRT",
                                    width=new_width,
                                    height=new_height,
                                    resampleAlg=gdal.GRA_Bilinear)
    
    gdal.Warp(destNameOrDestDS=output_path, srcDSOrSrcDSTab=src_ds, options=warp_options)

In [None]:
for fmask_path in fmask_paths:
    file_name = "_".join(fmask_path.split("/")[-1].split("_")[:-1])
    output_path = f"data/fmask_cropped/{file_name}/{file_name}.vrt"
    if not os.path.exists(f"data/fmask_cropped/{file_name}/"):
                os.makedirs(f"data/fmask_cropped/{file_name}/")
    fmask_double(fmask_path, output_path)

In [60]:
CROP_SIZE = 256

In [61]:
for fmask in os.listdir("data/fmask_cropped/")[:10]:
    print(fmask)
    image_path = f"data/fmask_cropped/{fmask}/{fmask}.vrt"
    image = gdal.Open(image_path)
    print(f"Processing {image_path}")

    width = image.RasterXSize
    height = image.RasterYSize

    gt = image.GetGeoTransform()

    min_x = int(gt[0])
    min_y = int(gt[3]) - CROP_SIZE * 10
    max_x = int(gt[0] + width*gt[1])
    max_y = int(gt[3] + height*gt[5]) - CROP_SIZE * 10

    x_length = range(min_x, max_x + CROP_SIZE + 1, CROP_SIZE*10)
    y_length = range(min_y, max_y - CROP_SIZE + 1, -CROP_SIZE*10)

    ALL_XY_COORDINATES = [(x, y) for y in y_length for x in x_length]

    if not os.path.isdir(f"data/fmask_cropped/{fmask}/cropped/"):
        os.makedirs(f"data/fmask_cropped/{fmask}/cropped/")
    for idx, (x, y) in enumerate(ALL_XY_COORDINATES):
        vrt_options = gdal.BuildVRTOptions(resolution="highest", outputBounds=(x, y, x + CROP_SIZE * 10, y + CROP_SIZE * 10))
        gdal.BuildVRT(f"data/fmask_cropped/{fmask}/cropped/{fmask}_{idx}_{x}_{y}_{CROP_SIZE}.vrt", [image_path], options=vrt_options)

S2_32VNH_20200825
Processing data/fmask_cropped/S2_32VNH_20200825/S2_32VNH_20200825.vrt
S2_32VNH_20210429
Processing data/fmask_cropped/S2_32VNH_20210429/S2_32VNH_20210429.vrt
S2_32VNH_20190903
Processing data/fmask_cropped/S2_32VNH_20190903/S2_32VNH_20190903.vrt
S2_32VNH_20200214
Processing data/fmask_cropped/S2_32VNH_20200214/S2_32VNH_20200214.vrt
S2_32VNH_20191214
Processing data/fmask_cropped/S2_32VNH_20191214/S2_32VNH_20191214.vrt
S2_32VNH_20210624
Processing data/fmask_cropped/S2_32VNH_20210624/S2_32VNH_20210624.vrt
S2_32VNH_20200822
Processing data/fmask_cropped/S2_32VNH_20200822/S2_32VNH_20200822.vrt
S2_32VNH_20210623
Processing data/fmask_cropped/S2_32VNH_20210623/S2_32VNH_20210623.vrt
S2_32VNH_20191028
Processing data/fmask_cropped/S2_32VNH_20191028/S2_32VNH_20191028.vrt
S2_32VNH_20190131
Processing data/fmask_cropped/S2_32VNH_20190131/S2_32VNH_20190131.vrt


In [62]:
json_data = {}

FMASK_DIRECTORY = f"data/fmask_cropped/"

for fmask_path in os.listdir(FMASK_DIRECTORY)[:10]:
    print(f"Processing {fmask_path}")
    path_to_crops = f"{FMASK_DIRECTORY}{fmask_path}/cropped/"
    for im_path in os.listdir(path_to_crops):
        x, y = int(im_path.split("_")[-3]), int(im_path.split("_")[-2])
        gdal_data = gdal.Open(f"{FMASK_DIRECTORY}{fmask_path}/cropped/{im_path}")
        data = gdal_data.ReadAsArray()
        
        shape = data.size
        
        count = dict(Counter(itertools.chain(*data)))
        map = {MAP_CLOUD[k]: round((v/shape)*100, 2) for k, v in count.items()}
        
        json_data[im_path.split("/")[-1]] = dict(sorted(map.items(), key=lambda x: -x[1]))

with open("data/fmask_cropped_stats.json", "w") as f:
    json.dump(json_data, f)

Processing S2_32VNH_20200825
Processing S2_32VNH_20210429
Processing S2_32VNH_20190903
Processing S2_32VNH_20200214
Processing S2_32VNH_20191214
Processing S2_32VNH_20210624
Processing S2_32VNH_20200822
Processing S2_32VNH_20210623
Processing S2_32VNH_20191028
Processing S2_32VNH_20190131
