In [5]:
import sys
import os
from typing import List
sys.path.insert(0, os.path.abspath('..'))

%load_ext autoreload
%autoreload 2

In [6]:
from utils.schemas import Split, ObservationPointer
from utils.data_management import observation_factory

In [7]:
from google.cloud import storage

GCP_PROJECT_NAME = "gee-sand"
BUCKET_NAME = "sand_mining"

gcp_client = storage.Client(project=GCP_PROJECT_NAME)

In [22]:
from rastervision.core.data import RasterioSource
from utils.data_management import get_location_from_key

dataset_summary = {}

all_observations: List[ObservationPointer] = list(observation_factory(gcp_client))
observations_per_locations = {}
for observation in all_observations:
    location = get_location_from_key(observation.name)
    if location in observations_per_locations:
        observations_per_locations[location].append(observation)
    else:
        observations_per_locations[location] = [observation]

for location, observation_list in observations_per_locations.items():
    # To determine the patch size, we only look into the first observations.
    # We expect that all observations cover the same geographical extent.
    raster_source = RasterioSource(observation_list[0].uri_to_bs, allow_streaming=False)
    coverage_area_km2 = raster_source.shape[0] * raster_source.shape[1] / 1e4  # Each pixel covers 100m^2
    summary_of_location = {
        "Number of observations": len(observation_list),
        "Patch size": raster_source.shape[:2],
        "Coverage area [km^2]": round(coverage_area_km2, 2)
    }
    dataset_summary[location] = summary_of_location

pprint(dataset_summary)
print(f"Total of {len(observations_per_locations)} locations")


2023-07-25 16:05:24:rastervision.pipeline.file_system.utils: INFO - Using cached file /data/tmp/cache/http/storage.googleapis.com/sand_mining/labels/Betwa_Hamirpur_79-81_25-91/bs/Betwa_Hamirpur_79-81_25-91_2022-03-22_bs.tif.
2023-07-25 16:05:24:rastervision.pipeline.file_system.utils: INFO - Using cached file /data/tmp/cache/http/storage.googleapis.com/sand_mining/labels/Chambal_More_77-86_26-61/bs/Chambal_More_77-86_26-61_2022-03-02_bs.tif.
2023-07-25 16:05:24:rastervision.pipeline.file_system.utils: INFO - Using cached file /data/tmp/cache/http/storage.googleapis.com/sand_mining/labels/Kathajodi_Cuttack_85-85_20-44/bs/Kathajodi_Cuttack_85-85_20-44_2022-03-03_bs.tif.
2023-07-25 16:05:24:rastervision.pipeline.file_system.utils: INFO - Using cached file /data/tmp/cache/http/storage.googleapis.com/sand_mining/labels/Ken_Banda_80-35_25-68/bs/Ken_Banda_80-35_25-68_2022-03-09_bs.tif.
2023-07-25 16:05:24:rastervision.pipeline.file_system.utils: INFO - Using cached file /data/tmp/cache/http/s

{'Betwa_Hamirpur_79-81_25-91': {'Coverage area [km^2]': 254.39,
                                'Number of observations': 5,
                                'Patch size': (1043, 2439)},
 'Chambal_More_77-86_26-61': {'Coverage area [km^2]': 68.61,
                              'Number of observations': 4,
                              'Patch size': (795, 863)},
 'Kathajodi_Cuttack_85-85_20-44': {'Coverage area [km^2]': 105.79,
                                   'Number of observations': 4,
                                   'Patch size': (653, 1620)},
 'Ken_Banda_80-35_25-68': {'Coverage area [km^2]': 25.8,
                           'Number of observations': 5,
                           'Patch size': (451, 572)},
 'Narmada_Sehore_77-32_22-56': {'Coverage area [km^2]': 20.32,
                                'Number of observations': 6,
                                'Patch size': (278, 731)},
 'Sone_Rohtas_84-21_24-91': {'Coverage area [km^2]': 582.03,
                             'Nu