Notebook for read and wrangle of precipitation historical data

# Prepare environment

In [None]:
import os
import pandas as pd

import geopandas as gpd
import rioxarray as rxr

from tqdm import tqdm

In [None]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [None]:
# load legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [None]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

# transform: raster -> vectorial
dataarray = rxr.open_rasterio(config.TPI_DATA)
df = dataarray[0].to_pandas()
df = pd.melt(df, ignore_index=False).reset_index().rename(columns={"value": "tpi"})

# tranform into geodataframe
tpi = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:4326"
).drop(["x", "y"], axis=1)

In [None]:
# compute all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [None]:
# join
tpi_frames = gpd.sjoin(tpi, frames, how="inner", op='within')

In [None]:
# compute mean, std, min, max tpi by frame
calc_tpi_frames = tpi_frames\
    .groupby("frame_id")\
    .agg({"tpi": ["mean", "std", "min", "max"]})\
    .reset_index()

In [None]:
# save as csv file
calc_tpi_frames.to_csv(config.TR_TPI, index=False)