In [None]:
# set GEO_BENCH_DIR to the root of the geobench repository
import os
os.environ["GEO_BENCH_DIR"] = "/mnt/data/cc_benchmark"

import geobench

if geobench.config._GEO_BENCH_DIR_ENV is None:
    print("Note: GEO_BENCH_DIR is not set. If this is not intended, this might be related on how jupyter loads environment variables.")
    print(f"Using GEO_BENCH_DIR={geobench.GEO_BENCH_DIR}")

from make_benchmark.dataset_converters import inspect_tools
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [10, 6]

classification_benchmark = "classification_v0.8.5"
segmentation_benchmark = "segmentation_v0.4"
%load_ext autoreload
%autoreload 2

# Classification benchmark


In [None]:
inspect_tools.benchmark_data_frame(classification_benchmark)

# Segmentation Benchmark

In [None]:
inspect_tools.benchmark_data_frame(segmentation_benchmark)

# Classification Samples

In [None]:
inspect_tools.ipyplot_benchmark(benchmark_name = classification_benchmark, n_samples=8, img_width=200)

# Segmentation Samples

In [None]:
inspect_tools.ipyplot_benchmark(benchmark_name = segmentation_benchmark, n_samples=4)

In [None]:
# generate images for the paper
plt.rcParams['figure.figsize'] = [4, 2]
inspect_tools.plot_benchmark(benchmark_name = classification_benchmark, n_samples=2)

# Inspect content of benchmark


This script detects many possible inconsistancy with the benchmrk's dataset
* checks partition integrity
  * make sure it is a partition (no redundancy and no overlap)
  * make sure it points to a sample that exists
* checks samples integrity
  * assert that band_info is the same as in task_specs
  * run assert_valid on each band_info
  * assert shape is valid
  * display statistics for each bands
  * verify if geospatial information is available


Known issues:
* so2sat doesn't have geospatial information (not provided in the original dataset)

In [None]:
inspect_tools.verify_benchmark_integrity(
    benchmark_name = classification_benchmark, 
    n_samples=100, rewrite_if_necessary=True, display_band_stats=False)

In [None]:
inspect_tools.verify_benchmark_integrity(
    benchmark_name = segmentation_benchmark, 
    n_samples=100, rewrite_if_necessary=True, display_band_stats=False)