Notebook for read and wrangle of precipitation historical data

# Prepare environment

In [1]:
import os
import pandas as pd

import geopandas as gpd
import rioxarray as rxr

from tqdm import tqdm

In [2]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [3]:
# load legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [4]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

In [5]:
# compute all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [6]:
# function to compute frames precipitation from tif file
def join_precip_frames(precip_df: pd.DataFrame):
    
    # tranform into geodataframe
    precip = gpd.GeoDataFrame(
        precip_df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:4326"
    ).drop(["x", "y"], axis=1)

    # join
    precip_frames = gpd.sjoin(precip, frames, how="inner", op='within')
    
    return precip_frames\
        .groupby("frame_id")["precipitation"].mean()\
        .reset_index()

In [7]:
# compute precipitations by quarter (file)
timeseries_precipitations = []
for f in tqdm(os.listdir(config.RAIN_DATA)):

    # read file into pandas format
    dataarray = rxr.open_rasterio(os.path.join(config.RAIN_DATA, f))
    df = dataarray[0].to_pandas()
    df = pd.melt(df, ignore_index=False)\
        .reset_index()\
        .dropna()\
        .rename(columns={"value": "precipitation"})
    # filter out zeros
    df = df[df["precipitation"] > 0]

    # join with frames and add data
    precip_frames = join_precip_frames(df)
    precip_frames["dt"] = pd.to_datetime(f.split("_")[1].split(".")[0])

    # append to time varying list
    timeseries_precipitations.append(precip_frames)

100%|██████████| 28/28 [15:05<00:00, 32.33s/it]


In [8]:
# concat quarters
timeseries_precipitations = pd.concat(timeseries_precipitations)

# save as csv file
timeseries_precipitations.to_csv(config.TR_RAIN_AVG, index=False)