![image/png](https://cdn-uploads.huggingface.co/production/uploads/6304c06eeb6d777a838eab63/BJKsLwX0GG4W3-gdf40TJ.png)
# 🗺️ Major-TOM Filtering
r[![HF](https://img.shields.io/badge/%F0%9F%A4%97-Datasets-yellow)](https://www.huggingface.co/Major-TOM) [![paper](https://img.shields.io/badge/arXiv-2402.12095-D12424)](https://www.arxiv.org/abs/2402.12095) [![GitHub stars](https://img.shields.io/github/stars/ESA-PhiLab/Major-TOM?style=social&label=Star&maxAge=2592000)](https://github.com/ESA-PhiLab/Major-TOM/)

This app provides a way of exploring samples present in the MajorTOM-Core dataset.
It contains nearly every piece of Earth capture by ESA Sentinel-2 satellis>view.

In [1]:
import sys
import os
import leafmap
from MajorTOM.grid import *

if os.path.isfile('metadata.parquet'):
    meta_path = 'metadata.parquet'
else:
    DATASET_NAME = 'Major-TOM/Core-S2L2A'
    meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)

grid = Grid(10, latitude_range=(-90,90), longitude_range=(-180,180))
df = pd.read_parquet(meta_path)

2024-12-12 11:09:27.443023: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-12 11:09:27.468719: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-12 11:09:27.468739: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-12 11:09:27.468758: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-12 11:09:27.474415: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

In [3]:
from fsspec.parquet import open_parquet_file
import fsspec
import pyarrow.parquet as pq
from io import BytesIO
from PIL import Image

def gridcell2ints(grid_string):
    up = int(grid_string.split('_')[0][:-1]) * (2*int(grid_string.split('_')[0][-1]=='U') - 1) # +ve if up
    right = int(grid_string.split('_')[1][:-1]) * (2*int(grid_string.split('_')[1][-1]=='R') - 1) # +ve if R

    return up, right

def row2image(parquet_url, parquet_row, fullrow_read=True):

    if fullrow_read:
        # option 1
        f=fsspec.open(parquet_url)
        temp_path = f.open()
    else:
        # option 2
        temp_path = open_parquet_file(parquet_url,columns = ["thumbnail"])
    
    with pq.ParquetFile(temp_path) as pf:
        first_row_group = pf.read_row_group(parquet_row, columns=['thumbnail'])

    stream = BytesIO(first_row_group['thumbnail'][0].as_py())
    return Image.open(stream)

def row2s2(parquet_url, parquet_row, s2_bands = ["B04", "B03", "B02"]):
    with open_parquet_file(parquet_url,columns = s2_bands) as f:
        with pq.ParquetFile(f) as pf:
            first_row_group = pf.read_row_group(parquet_row, columns=s2_bands)

    return first_row_group

def cell2row(grid_string, meta_df, return_row = False):
    row_U, col_R = gridcell2ints(grid_string)
    R = meta_df.query('grid_row_u == {} & grid_col_r == {}'.format(row_U, col_R))

    if not R.empty:
        if return_row:
            return R.parquet_url.item(), R.parquet_row.item(), R
        else:
            return R.parquet_url.item(), R.parquet_row.item()
    else:
        return None

def map_to_image(map):

    # 1. get bounds
    bbox = map.get_bbox()
    center = [(bbox[3]+bbox[1])/2, (bbox[2]+bbox[0])/2]

    # 2. translate coordinate to major-tom tile
    rows, cols = grid.latlon2rowcol([center[0]], [center[1]])

    # 3. translate major-tom cell to row in parquet
    row = cell2row("{}_{}".format(rows[0],cols[0]), df, return_row = True)

    if row is not None:
        parquet_url, parquet_row, meta_row = row
        img = row2image(parquet_url, parquet_row)
        # 4. acquire image # X. update map
        lat, lon = meta_row.centre_lat.item(), meta_row.centre_lon.item()
        
        map.set_center(lon, lat, zoom=12)

        return img
    else:
        return None

### Instructions
To find a sample, navigate on the map to a place of interest. Click `Find Sample` to find a dataset sample that contains the central pixel of your current view.

In [None]:
from IPython.display import display
from ipywidgets import Button, HBox, VBox, Layout, Output, Dropdown

m = leafmap.Map(
    height=559,
    width=559,
    zoom=4,
    draw_control=False,
    measure_control=False,
    fullscreen_control=False,
    toolbar_control=False,
    attribution_control=True,
)

button = Button(description="Find Sample", style={"font_size" : "30px"}, layout=Layout(width='50%', height='80px'))
output = Output()

left_box = VBox([m, button], layout=Layout(width='50%',display='flex',flex_flow='column',align_items='center'))
right_box = VBox([output], layout=Layout(width='50%'))
full_page = HBox([left_box, right_box])

display(full_page)

def on_button_clicked(b):
    output = Output()
    with output:
        display(map_to_image(m))
    right_box.children = [output]

button.on_click(on_button_clicked)

HBox(children=(VBox(children=(Map(center=[20, 0], controls=(AttributionControl(options=['position', 'prefix'],…

<details><summary><strong>🏝 Couldn't find a sample? See this figure of global coverage:</strong></summary>
    <img src='https://cdn-uploads.huggingface.co/production/uploads/6304c06eeb6d777a838eab63/2KTarfsM0a1dNYEbXriUH.png' />
</details>