In [47]:
import rasterio
from rasterio.windows import Window
from pathlib import Path
from pyproj import Transformer
import pandas as pd
import geopandas as gpd
from tqdm.auto import tqdm
from rasterio.crs import CRS


In [15]:
confirmed_products_dir = Path('confirmed_products')

In [3]:
subdirs = list(confirmed_products_dir.glob('*/'))
mgrs_tiles = [subdir.name for subdir in subdirs]
mgrs_tiles[:3]

['50NPL']

In [20]:
MGRS_TILE_ID = mgrs_tiles[0]
MGRS_TILE_ID

'50NPL'

In [33]:
import rasterio
from rasterio.windows import Window
from pyproj import Transformer

def get_pixel_value(tif, lon, lat):
    with rasterio.open(tif) as src:
        transformer = Transformer.from_crs("EPSG:4326", src.crs, always_xy=True)
        
        x_utm, y_utm = transformer.transform(lon, lat)
        
        row, col = src.index(x_utm, y_utm)
        
        if row < 0 or row >= src.height or col < 0 or col >= src.width:
            raise ValueError("Point is outside the bounds of the raster")
        
        # Create window and read pixel value
        window = Window(col, row, 1, 1)
        val = src.read(1, window=window)[0, 0]
    
    return val

In [None]:
mgrs_ts_dir = confirmed_products_dir / MGRS_TILE_ID
mgrs_ts_prods = sorted(list(mgrs_ts_dir.glob('OPERA*/')))
mgrs_ts_prods[:2]


[PosixPath('confirmed_products/50NPL/OPERA_L3_DIST-ALERT-S1_T50NPL_20240105T215803Z_20250910T164908Z_S1_30_v0.1'),
 PosixPath('confirmed_products/50NPL/OPERA_L3_DIST-ALERT-S1_T50NPL_20240112T214955Z_20250910T165629Z_S1_30_v0.1')]

In [None]:
mgrs_ts_status = [prod_dir / f'{prod_dir.name}_GEN-DIST-STATUS.tif' for prod_dir in mgrs_ts_prods]
mgrs_ts_status[:2]

[PosixPath('confirmed_products/50NPL/OPERA_L3_DIST-ALERT-S1_T50NPL_20240105T215803Z_20250910T164908Z_S1_30_v0.1/OPERA_L3_DIST-ALERT-S1_T50NPL_20240105T215803Z_20250910T164908Z_S1_30_v0.1_GEN-DIST-STATUS.tif'),
 PosixPath('confirmed_products/50NPL/OPERA_L3_DIST-ALERT-S1_T50NPL_20240112T214955Z_20250910T165629Z_S1_30_v0.1/OPERA_L3_DIST-ALERT-S1_T50NPL_20240112T214955Z_20250910T165629Z_S1_30_v0.1_GEN-DIST-STATUS.tif')]

In [None]:
mgrs_ts_dates = [pd.to_datetime(prod_dir.name.split('_')[4]).date() for prod_dir in mgrs_ts_prods]
mgrs_ts_dates[:2]

[datetime.date(2024, 1, 5), datetime.date(2024, 1, 12)]

# Dist Tables

In [18]:
df_val = pd.read_csv('../tables/reference_data/referenceTimeSeriesInterpolated16_16_goodFirst.csv')
df_val.head()

Unnamed: 0,ID,overallLabel,Long,Lat,changetype,MGRS,2024-01-01,2024-01-02,2024-01-03,2024-01-04,...,2024-12-23,2024-12-24,2024-12-25,2024-12-26,2024-12-27,2024-12-28,2024-12-29,2024-12-30,2024-12-31,2025-01-01
0,34405_1,VLmaj,118.036706,5.357735,Plantation clearing,50NPL,noObs,noChange,noChange,noChange,...,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs
1,34405_2,VLmaj,118.082496,5.378278,Plantation clearing,50NPL,noObs,noChange,noChange,noChange,...,VLsub,VLsub,VLsub,VLsub,VLsub,VLsub,VLsub,VLsub,noObs,noObs
2,34405_3,VLmaj,118.024016,5.378379,Plantation clearing,50NPL,noObs,noChange,noChange,noChange,...,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs
3,34405_4,VLmaj,118.032355,5.346074,Plantation clearing,50NPL,noObs,noChange,noChange,noChange,...,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs,noObs
4,34405_5,VLmaj,118.077235,5.311264,Plantation clearing,50NPL,noObs,noObs,noObs,noObs,...,noChange,noChange,noChange,noChange,noChange,noChange,noChange,noChange,noObs,noObs


In [26]:
df_val_mgrs = df_val[df_val['MGRS'] == MGRS_TILE_ID].reset_index(drop=True)
sites_in_mgrs_tile = df_val_mgrs.to_dict(orient='records')

In [None]:
tables = []
for site in tqdm(sites_in_mgrs_tile, desc='Processing sites'):
    site_id = site['ID']
    site_lon = site['Long']
    site_lat = site['Lat'] 

    labels = [get_pixel_value(tif, site_lon, site_lat) for tif in mgrs_ts_status]

    df_dist_s1_table_site = pd.DataFrame({
        'site_id': [site_id] * len(labels),
        'date': mgrs_ts_dates,
        # 'longitude': [site_lon] * len(labels),
        # 'latitude': [site_lat] * len(labels),
        'labels': labels,
    })
    tables.append(df_dist_s1_table_site)

tables[0].head()


Processing sites: 100%|██████████| 20/20 [00:02<00:00,  8.90it/s]


Unnamed: 0,site_id,date,labels
0,34405_1,2024-01-05,0
1,34405_1,2024-01-12,0
2,34405_1,2024-01-17,0
3,34405_1,2024-01-24,0
4,34405_1,2024-01-29,0


In [43]:
table_dir = Path(f'dist_s1_label_tables/{MGRS_TILE_ID}')
table_dir.mkdir(exist_ok=True, parents=True)
for table in tables:
    site_id = table['site_id'].tolist()[0]
    table_path = table_dir / f'{site_id}.csv'
    table.to_csv(table_path, index=False)


# Generate Site Vector Table

In [53]:
df_site_geo = gpd.GeoDataFrame(df_val_mgrs[['ID']],
                               geometry=gpd.points_from_xy(df_val_mgrs.Long, 
                                                           df_val_mgrs.Lat), 
                               crs=CRS.from_epsg(4326))
df_site_geo.head()


Unnamed: 0,ID,geometry
0,34405_1,POINT (118.03671 5.35773)
1,34405_2,POINT (118.0825 5.37828)
2,34405_3,POINT (118.02402 5.37838)
3,34405_4,POINT (118.03235 5.34607)
4,34405_5,POINT (118.07723 5.31126)


In [54]:

df_site_geo.to_file(table_dir / 'sites.geojson', driver='GeoJSON')

