Notebook for read and wrangle of sentinel-1 data (VV band)

# Prepare environment

In [1]:
import os
import pandas as pd

import geopandas as gpd
import rioxarray as rxr

from tqdm import tqdm

In [2]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [3]:
# load legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [4]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

In [5]:
# compute all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [6]:
# function to compute frames from tif file
def join_frames(df: pd.DataFrame):
    
    # tranform into geodataframe
    df = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:4326"
    ).drop(["x", "y"], axis=1)

    # join
    df_frames = gpd.sjoin(df, frames, how="inner", op='within')
    
    return df_frames\
        .groupby("frame_id")["value"].mean()\
        .reset_index()

In [7]:
# compute precipitations by quarter (file)
timeseries_sentinel = []
for f in tqdm(os.listdir(config.SENTINEL1_DATA)):

    # read file into pandas format
    dataarray = rxr.open_rasterio(os.path.join(config.SENTINEL1_DATA, f))
    df = dataarray[0].to_pandas()
    df = pd.melt(df, ignore_index=False)\
        .reset_index()\
        .dropna()

    # join with frames and add data
    df_frames = join_frames(df)
    df_frames["dt"] = pd.to_datetime(f.split("_")[1].split(".")[0])

    # append to time varying list
    timeseries_sentinel.append(df_frames)

100%|██████████| 28/28 [08:50<00:00, 18.94s/it]


In [8]:
# concat quarters
timeseries_sentinel_df = pd.concat(timeseries_sentinel)

In [14]:
timeseries_sentinel_df.shape

(20517800, 3)

In [13]:
# save as csv file
timeseries_sentinel_df.to_csv(config.TR_SENTINEL1, index=False)