In [1]:
import sys
import os
from typing import List
from pprint import pprint
sys.path.insert(0, os.path.abspath('..'))

%load_ext autoreload
%autoreload 2

In [None]:
from google.cloud import storage
from project_config import GCP_PROJECT_NAME

gcp_client = storage.Client(project=GCP_PROJECT_NAME)

In [3]:
from utils.schemas import ObservationPointer
from rastervision.core.data import RasterioSource
from utils.data_management import observation_factory

## Mean and Std of S1 images

In [None]:
import numpy as np
from rastervision.core.data.raster_transformer.nan_transformer import NanTransformer

all_observations: List[ObservationPointer] = list(observation_factory(gcp_client))

all_vv_raveled = []
all_vh_raveled = []
for observation in all_observations:
    raster_source = RasterioSource(
        observation.uri_to_s1,
        raster_transformers=[NanTransformer()]  # replaces NaNs with 0
    )

    vv_img = raster_source.get_image_array()[:,:,0]
    vh_img = raster_source.get_image_array()[:,:,1]
    all_vv_raveled.append(vv_img.ravel())
    all_vh_raveled.append(vh_img.ravel())
    
all_vv = np.hstack(all_vv_raveled)
all_vh = np.hstack(all_vh_raveled)


In [5]:
print(f"VV: Mean = {np.mean(all_vv)}, Std = {np.std(all_vv)}")
print(f"VH: Mean = {np.mean(all_vh)}, Std = {np.std(all_vh)}")

VV: Mean = -11.950798465937643, Std = 3.319216134000598
VH: Mean = -18.939975061395252, Std = 3.840950717746793


## Area of observations

In [None]:
from utils.data_management import get_location_from_key

dataset_summary = {}

all_observations: List[ObservationPointer] = list(observation_factory(gcp_client))
observations_per_locations = {}
for observation in all_observations:
    location = get_location_from_key(observation.name)
    if location in observations_per_locations:
        observations_per_locations[location].append(observation)
    else:
        observations_per_locations[location] = [observation]

for location, observation_list in observations_per_locations.items():
    # To determine the patch size, we only look into the first observations.
    # We expect that all observations cover the same geographical extent.
    raster_source = RasterioSource(observation_list[0].uri_to_s2, allow_streaming=False)
    coverage_area_km2 = raster_source.shape[0] * raster_source.shape[1] / 1e4  # Each pixel covers 100m^2
    summary_of_location = {
        "Number of observations": len(observation_list),
        "Patch size": raster_source.shape[:2],
        "Coverage area [km^2]": round(coverage_area_km2, 2)
    }
    dataset_summary[location] = summary_of_location

pprint(dataset_summary)
print(f"Total of {len(observations_per_locations)} locations")
