Notebook for read and wrangle of deforestation historical data

# Prepare environment

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import geopandas as gpd
from shapely.geometry import box

from tqdm import tqdm

In [2]:
import config

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [3]:
# legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [4]:
# prodes deforestation alerts
prodes = gpd.read_file(config.PRODES_DATA)

In [5]:
# deter deforestation alerts
deter = gpd.read_file(config.DETER_DATA)

In [6]:
# deforestation status on initial time (2007)
acc_deforestation_init = gpd.read_file(config.INITIAL_DEFORESTATION)

## Date format

In [7]:
# datetime data formatting
deter["date"] = pd.to_datetime(deter["VIEW_DATE"])
prodes["date"] = pd.to_datetime(prodes["image_date"])

# Create full grid

In [8]:
# get rectangle bounds from region
min_x, min_y, max_x, max_y = am_bounds.total_bounds
print(min_x, min_y, max_x, max_y)
# min_x, min_y, max_x, max_y = [-69.0, -11.0, -68.0, -10.0]

-73.9909722199879 -18.04176667000001 -43.95182736291479 5.27222500000003


In [9]:
# generate grid intersection with desired area
km_x = (max_x - min_x) / config.BOX_SIDE
km_y = (max_y - min_y) / config.BOX_SIDE
matrix_size_x = int(np.ceil(km_x))
matrix_size_y = int(np.ceil(km_y))
all_boxes = []
frame_idx = []  # list with indexes from each box
all_idx = []
idx = 0
for ix in range(matrix_size_x):
    for iy in range(matrix_size_y):
        frame = box(min_x+ix*config.BOX_SIDE, min_y+iy*config.BOX_SIDE, min_x+(ix+1)*config.BOX_SIDE, min_y+(iy+1)*config.BOX_SIDE)
        if am_bounds.intersects(frame).any():
            all_boxes.append(frame)
            all_idx.append(idx)
        frame_idx.append((ix, iy))
        idx += 1
frames = gpd.GeoDataFrame({"geometry": all_boxes}, crs=am_bounds.crs)
frames["frame_id"] = all_idx
frame_idx = np.array(frame_idx)

In [10]:
# save all frames geo-coordinates
frames.to_file(config.TR_FRAMES)

# save frames idx documentation
pd.DataFrame(frame_idx, columns=["x", "y"]).to_csv(config.TR_FRAMES_IDX)

In [11]:
def cut_frames(x_lim: int, y_lim: int, square_size: int=10):
    return frames[
        frames["frame_id"].isin(
            np.where(
                (frame_idx[:, 0] >= x_lim) & (frame_idx[:, 0] < x_lim+square_size) &
                (frame_idx[:, 1] >= y_lim) & (frame_idx[:, 1] < y_lim+square_size)
            )[0]
        )
    ]

# Compute temporal evolution

In [12]:
# create date range
monthly_first = pd.date_range(config.DT_INIT, config.DT_FIM, freq="MS")
monthly_last  = pd.date_range(config.DT_INIT, config.DT_FIM, freq="M")

In [13]:
def generate_temporal_series(frames):
    # initial deforestation state
    deforestation_state = gpd.overlay(
        frames,
        acc_deforestation_init[["geometry"]], 
        how="intersection",
        keep_geom_type=False
    ).dissolve("frame_id").reset_index()

    # create series applying accumulated deforestation data
    deforestation_series = []
    for dti, dtf in tqdm(list(zip(monthly_first, monthly_last))):
        # print("Criando série para data:", dti)
        # filter prodes data between dates
        dt_prodes = prodes[(prodes["date"] >= dti) & (prodes["date"] <= dtf)][["geometry"]]
        # filter deter data between dates
        dt_deter = deter[(deter["date"] >= dti) & (deter["date"] <= dtf)][["geometry"]]
        # get frame id for each case
        prodes_cases = gpd.overlay(
            frames, dt_prodes, how="intersection", keep_geom_type=False
        )
        deter_cases = gpd.overlay(
            frames, dt_deter, how="intersection", keep_geom_type=False
        )
        # dissolve by frame summing area
        dt_cases = pd.concat([prodes_cases, deter_cases])
        dt_deforestation = dt_cases.dissolve("frame_id").reset_index()
        # get deforestation state (unitl date)
        deforestation_state = (
            pd.concat([deforestation_state, dt_deforestation])
            .dissolve("frame_id")
            .reset_index()
        )
        # update deforestation series
        deforestation_series.append(deforestation_state.copy())

    # create temporal data (warning about area error will show up)
    temporal_data = []
    for ds in deforestation_series:
        s = pd.merge(frames, ds, on="frame_id")
        s["area"] = s.geometry_y.area / s.geometry_x.area    
        temporal_data.append(s.copy())

    return temporal_data

In [14]:
matrix_size_x, matrix_size_y

(338, 262)

In [15]:
# save regions iterating squares
for x_lim in list(range(0, matrix_size_x, config.STEP)):
    for y_lim in list(range(0, matrix_size_y, config.STEP)):
        # get cut
        lim_frames = cut_frames(x_lim, y_lim, config.STEP)
        if len(lim_frames) > 0:
            print(f"\nGerando output do corte x={x_lim} / {matrix_size_x}, y={y_lim} / {matrix_size_y}")
            temporal_data = generate_temporal_series(
                lim_frames
            )

            # write history file
            with open(
                os.path.join(config.TR_DEFORESTATION, f"hist_{x_lim}_{y_lim}.pkl"),
                "wb"
            ) as file:
                pickle.dump(
                    [ds[["frame_id", "area"]] for ds in temporal_data],
                    file
                )

            del temporal_data


Gerando output do corte x=0 / 338, y=90 / 262


100%|██████████| 74/74 [03:11<00:00,  2.59s/it]



Gerando output do corte x=0 / 338, y=120 / 262


100%|██████████| 74/74 [00:38<00:00,  1.93it/s]



Gerando output do corte x=0 / 338, y=150 / 262


100%|██████████| 74/74 [00:16<00:00,  4.58it/s]



Gerando output do corte x=30 / 338, y=60 / 262


100%|██████████| 74/74 [02:22<00:00,  1.92s/it]



Gerando output do corte x=30 / 338, y=90 / 262


100%|██████████| 74/74 [05:00<00:00,  4.06s/it]



Gerando output do corte x=30 / 338, y=120 / 262


100%|██████████| 74/74 [00:38<00:00,  1.91it/s]



Gerando output do corte x=30 / 338, y=150 / 262


100%|██████████| 74/74 [00:32<00:00,  2.26it/s]



Gerando output do corte x=30 / 338, y=180 / 262


100%|██████████| 74/74 [00:28<00:00,  2.62it/s]



Gerando output do corte x=30 / 338, y=210 / 262


100%|██████████| 74/74 [00:22<00:00,  3.30it/s]



Gerando output do corte x=60 / 338, y=60 / 262


100%|██████████| 74/74 [02:43<00:00,  2.21s/it]



Gerando output do corte x=60 / 338, y=90 / 262


100%|██████████| 74/74 [05:15<00:00,  4.27s/it]



Gerando output do corte x=60 / 338, y=120 / 262


100%|██████████| 74/74 [00:33<00:00,  2.22it/s]



Gerando output do corte x=60 / 338, y=150 / 262


100%|██████████| 74/74 [00:46<00:00,  1.61it/s]



Gerando output do corte x=60 / 338, y=180 / 262


100%|██████████| 74/74 [00:34<00:00,  2.13it/s]



Gerando output do corte x=60 / 338, y=210 / 262


100%|██████████| 74/74 [00:25<00:00,  2.86it/s]



Gerando output do corte x=90 / 338, y=60 / 262


100%|██████████| 74/74 [03:12<00:00,  2.60s/it]



Gerando output do corte x=90 / 338, y=90 / 262


100%|██████████| 74/74 [06:17<00:00,  5.10s/it]



Gerando output do corte x=90 / 338, y=120 / 262


100%|██████████| 74/74 [00:34<00:00,  2.12it/s]



Gerando output do corte x=90 / 338, y=150 / 262


100%|██████████| 74/74 [01:14<00:00,  1.01s/it]



Gerando output do corte x=90 / 338, y=180 / 262


100%|██████████| 74/74 [00:28<00:00,  2.62it/s]



Gerando output do corte x=90 / 338, y=210 / 262


100%|██████████| 74/74 [00:21<00:00,  3.38it/s]



Gerando output do corte x=90 / 338, y=240 / 262


100%|██████████| 74/74 [00:15<00:00,  4.63it/s]



Gerando output do corte x=120 / 338, y=30 / 262


100%|██████████| 74/74 [00:47<00:00,  1.56it/s]



Gerando output do corte x=120 / 338, y=60 / 262


100%|██████████| 74/74 [03:52<00:00,  3.14s/it]



Gerando output do corte x=120 / 338, y=90 / 262


100%|██████████| 74/74 [05:00<00:00,  4.06s/it]



Gerando output do corte x=120 / 338, y=120 / 262


100%|██████████| 74/74 [00:57<00:00,  1.29it/s]



Gerando output do corte x=120 / 338, y=150 / 262


100%|██████████| 74/74 [01:30<00:00,  1.22s/it]



Gerando output do corte x=120 / 338, y=180 / 262


100%|██████████| 74/74 [00:30<00:00,  2.41it/s]



Gerando output do corte x=120 / 338, y=210 / 262


100%|██████████| 74/74 [03:07<00:00,  2.54s/it]



Gerando output do corte x=120 / 338, y=240 / 262


100%|██████████| 74/74 [00:33<00:00,  2.20it/s]



Gerando output do corte x=150 / 338, y=0 / 262


100%|██████████| 74/74 [00:44<00:00,  1.66it/s]



Gerando output do corte x=150 / 338, y=30 / 262


100%|██████████| 74/74 [01:56<00:00,  1.57s/it]



Gerando output do corte x=150 / 338, y=60 / 262


100%|██████████| 74/74 [03:24<00:00,  2.77s/it]



Gerando output do corte x=150 / 338, y=90 / 262


100%|██████████| 74/74 [03:16<00:00,  2.66s/it]



Gerando output do corte x=150 / 338, y=120 / 262


100%|██████████| 74/74 [02:04<00:00,  1.69s/it]



Gerando output do corte x=150 / 338, y=150 / 262


100%|██████████| 74/74 [04:52<00:00,  3.95s/it]



Gerando output do corte x=150 / 338, y=180 / 262


100%|██████████| 74/74 [01:04<00:00,  1.14it/s]



Gerando output do corte x=150 / 338, y=210 / 262


100%|██████████| 74/74 [02:32<00:00,  2.06s/it]



Gerando output do corte x=150 / 338, y=240 / 262


100%|██████████| 74/74 [00:16<00:00,  4.37it/s]



Gerando output do corte x=180 / 338, y=0 / 262


100%|██████████| 74/74 [01:03<00:00,  1.16it/s]



Gerando output do corte x=180 / 338, y=30 / 262


100%|██████████| 74/74 [02:19<00:00,  1.89s/it]



Gerando output do corte x=180 / 338, y=60 / 262


100%|██████████| 74/74 [04:58<00:00,  4.04s/it]



Gerando output do corte x=180 / 338, y=90 / 262


100%|██████████| 74/74 [03:40<00:00,  2.98s/it]



Gerando output do corte x=180 / 338, y=120 / 262


100%|██████████| 74/74 [03:23<00:00,  2.75s/it]



Gerando output do corte x=180 / 338, y=150 / 262


100%|██████████| 74/74 [04:59<00:00,  4.05s/it]



Gerando output do corte x=180 / 338, y=180 / 262


100%|██████████| 74/74 [02:16<00:00,  1.84s/it]



Gerando output do corte x=180 / 338, y=210 / 262


100%|██████████| 74/74 [00:21<00:00,  3.37it/s]



Gerando output do corte x=210 / 338, y=0 / 262


100%|██████████| 74/74 [00:26<00:00,  2.74it/s]



Gerando output do corte x=210 / 338, y=30 / 262


100%|██████████| 74/74 [01:20<00:00,  1.09s/it]



Gerando output do corte x=210 / 338, y=60 / 262


100%|██████████| 74/74 [04:16<00:00,  3.46s/it]



Gerando output do corte x=210 / 338, y=90 / 262


100%|██████████| 74/74 [02:26<00:00,  1.98s/it]



Gerando output do corte x=210 / 338, y=120 / 262


100%|██████████| 74/74 [02:20<00:00,  1.90s/it]



Gerando output do corte x=210 / 338, y=150 / 262


100%|██████████| 74/74 [08:11<00:00,  6.64s/it]



Gerando output do corte x=210 / 338, y=180 / 262


100%|██████████| 74/74 [03:14<00:00,  2.62s/it]



Gerando output do corte x=210 / 338, y=210 / 262


100%|██████████| 74/74 [00:48<00:00,  1.52it/s]



Gerando output do corte x=240 / 338, y=0 / 262


100%|██████████| 74/74 [00:22<00:00,  3.35it/s]



Gerando output do corte x=240 / 338, y=30 / 262


100%|██████████| 74/74 [01:01<00:00,  1.20it/s]



Gerando output do corte x=240 / 338, y=60 / 262


100%|██████████| 74/74 [02:46<00:00,  2.25s/it]



Gerando output do corte x=240 / 338, y=90 / 262


100%|██████████| 74/74 [04:13<00:00,  3.42s/it]



Gerando output do corte x=240 / 338, y=120 / 262


100%|██████████| 74/74 [06:53<00:00,  5.59s/it]



Gerando output do corte x=240 / 338, y=150 / 262


100%|██████████| 74/74 [10:16<00:00,  8.33s/it]



Gerando output do corte x=240 / 338, y=180 / 262


100%|██████████| 74/74 [02:23<00:00,  1.94s/it]



Gerando output do corte x=240 / 338, y=210 / 262


100%|██████████| 74/74 [01:31<00:00,  1.23s/it]



Gerando output do corte x=240 / 338, y=240 / 262


100%|██████████| 74/74 [00:22<00:00,  3.35it/s]



Gerando output do corte x=270 / 338, y=30 / 262


100%|██████████| 74/74 [00:17<00:00,  4.17it/s]



Gerando output do corte x=270 / 338, y=60 / 262


100%|██████████| 74/74 [00:28<00:00,  2.56it/s]



Gerando output do corte x=270 / 338, y=90 / 262


100%|██████████| 74/74 [01:20<00:00,  1.09s/it]



Gerando output do corte x=270 / 338, y=120 / 262


100%|██████████| 74/74 [05:08<00:00,  4.17s/it]



Gerando output do corte x=270 / 338, y=150 / 262


100%|██████████| 74/74 [10:42<00:00,  8.68s/it]



Gerando output do corte x=270 / 338, y=180 / 262


100%|██████████| 74/74 [03:50<00:00,  3.12s/it]



Gerando output do corte x=270 / 338, y=210 / 262


100%|██████████| 74/74 [00:16<00:00,  4.54it/s]



Gerando output do corte x=300 / 338, y=30 / 262


100%|██████████| 74/74 [00:23<00:00,  3.11it/s]



Gerando output do corte x=300 / 338, y=60 / 262


100%|██████████| 74/74 [00:30<00:00,  2.46it/s]



Gerando output do corte x=300 / 338, y=90 / 262


100%|██████████| 74/74 [00:20<00:00,  3.59it/s]



Gerando output do corte x=300 / 338, y=120 / 262


100%|██████████| 74/74 [04:25<00:00,  3.59s/it]



Gerando output do corte x=300 / 338, y=150 / 262


100%|██████████| 74/74 [07:11<00:00,  5.83s/it]



Gerando output do corte x=300 / 338, y=180 / 262


100%|██████████| 74/74 [02:15<00:00,  1.83s/it]



Gerando output do corte x=330 / 338, y=120 / 262


100%|██████████| 74/74 [03:33<00:00,  2.89s/it]



Gerando output do corte x=330 / 338, y=150 / 262


100%|██████████| 74/74 [07:14<00:00,  5.87s/it]



Gerando output do corte x=330 / 338, y=180 / 262


100%|██████████| 74/74 [00:17<00:00,  4.29it/s]
