Notebook for read and wrangle of landcover historical data

# Prepare environment

In [1]:
import os
import numpy as np
import pandas as pd

import geopandas as gpd
import rioxarray as rxr

from tqdm import tqdm

In [2]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [4]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

In [5]:
# compute all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [6]:
# transform: raster -> vectorial
dataarray = rxr.open_rasterio(config.LANDCOVER_DATA)
df = dataarray[0].to_pandas()
df = pd.melt(df, ignore_index=False).reset_index().rename(columns={"value": "landcover"})

In [7]:
# tranform into geodataframe
landcover = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:4326"
).drop(["x", "y"], axis=1)

In [9]:
del df

In [10]:
all_frames_landcover = []

# apply transformation for groups of frames
for fidx in tqdm(np.array_split(frames_idx, 300)):
    
    # filter frames
    iframes = frames[frames["frame_id"].isin(fidx.index)]

    # join
    all_frames_landcover.append(
        gpd.sjoin(landcover, iframes, how="inner", op='within')\
            .groupby(["frame_id", "landcover"])["geometry"]\
            .count().reset_index()
    )

  0%|          | 0/300 [00:00<?, ?it/s]

100%|██████████| 300/300 [50:49<00:00, 10.16s/it]


In [12]:
# save as csv file
pd.concat(all_frames_landcover).to_csv(config.TR_LANDCOVER, index=False)