Notebook for read and wrangle of deforestation historical data

# Prepare environment

In [1]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import geopandas as gpd
from shapely.geometry import box

In [2]:
import config

# Data reading

In [None]:
# legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [None]:
# prodes deforestation alerts
prodes = gpd.read_file(config.PRODES_DATA)

In [None]:
# deter deforestation alerts
deter = gpd.read_file(config.DETER_DATA)

In [None]:
# deforestation status on initial time (2007)
acc_deforestation_init = gpd.read_file(config.INITIAL_DEFORESTATION)

## Date format

In [None]:
# datetime data formatting
deter["date"] = pd.to_datetime(deter["VIEW_DATE"])
prodes["date"] = pd.to_datetime(prodes["image_date"])

# Create full grid

In [None]:
# get rectangle bounds from region
min_x, min_y, max_x, max_y = am_bounds.total_bounds
print(min_x, min_y, max_x, max_y)
min_x, min_y, max_x, max_y = [-68.7, -11.0, -68.2, -10.5]
min_x, min_y, max_x, max_y = [-69.0, -11.0, -68.0, -10.0]

In [None]:
box_side = 0.0089  # aproximate 1km distance on geografical coordinates

# generate grid intersection with desired area
km_x = (max_x - min_x) / box_side
km_y = (max_y - min_y) / box_side
matrix_size_x = int(np.ceil(km_x))
matrix_size_y = int(np.ceil(km_y))
all_boxes = []
frame_idx = []  # list with indexes from each box
for ix in range(matrix_size_x):
    for iy in range(matrix_size_y):
        frame = box(min_x+ix*box_side, min_y+iy*box_side, min_x+(ix+1)*box_side, min_y+(iy+1)*box_side)
        all_boxes.append(frame)
        frame_idx.append((ix, iy))
frames = gpd.GeoDataFrame({"geometry": all_boxes}, crs=am_bounds.crs)\
    .reset_index().rename(columns={"index": "frame_id"})

# Compute temporal evolution

In [None]:
# create date range
monthly_first = pd.date_range(config.DT_INIT, config.DT_FIM, freq="MS")
monthly_last  = pd.date_range(config.DT_INIT, config.DT_FIM, freq="M")

In [None]:
# initial deforestation state
deforestation_state = gpd.overlay(
    frames,
    acc_deforestation_init[["geometry"]], 
    how="intersection",
    keep_geom_type=False
).dissolve("frame_id").reset_index()

In [None]:
# create series applying accumulated deforestation data
deforestation_series = []

for dti, dtf in list(zip(monthly_first, monthly_last)):
    print(dti)

    # filter prodes data between dates
    dt_prodes = prodes[(prodes["date"] >= dti) & (prodes["date"] <= dtf)][["geometry"]]

    # filter deter data between dates
    dt_deter = deter[(deter["date"] >= dti) & (deter["date"] <= dtf)][["geometry"]]

    # get frame id for each case
    prodes_cases = gpd.overlay(
        frames, dt_prodes, how="intersection", keep_geom_type=False
    )
    deter_cases = gpd.overlay(
        frames, dt_deter, how="intersection", keep_geom_type=False
    )

    # dissolve by frame summing area
    dt_cases = pd.concat([prodes_cases, deter_cases])
    dt_deforestation = dt_cases.dissolve("frame_id").reset_index()

    # get deforestation state (unitl date)
    deforestation_state = (
        pd.concat([deforestation_state, dt_deforestation])
        .dissolve("frame_id")
        .reset_index()
    )

    # update deforestation series
    deforestation_series.append(deforestation_state.copy())

In [None]:
# create temporal data (warning about area error will show up)
temporal_data = []
for ds in deforestation_series:
    s = pd.merge(frames, ds, on="frame_id")
    s["area"] = s.geometry_y.area / s.geometry_x.area    
    temporal_data.append(s.copy())

# Visualizations

In [None]:
# last state exact areas vs square aproximation
s = temporal_data[-1]

fig, ax = plt.subplots()
s["geometry_y"].boundary.plot(color="red", ax=ax, linewidth=0.2)
s.set_geometry("geometry_x").plot(column="area", cmap="Blues", ax=ax)
plt.show()

In [None]:
# first vs last stage
fig, ax = plt.subplots(1, 2)

temporal_data[0].set_geometry("geometry_x").plot(ax=ax[0], column="area", cmap="Blues")
temporal_data[-1].set_geometry("geometry_x").plot(ax=ax[1], column="area", cmap="Blues")

plt.show()

temporal_data[0]["area"].sum(), temporal_data[-1]["area"].sum()

# Matrix data

In [None]:
# associate x y coordinate from frames
frames[["x", "y"]] = frame_idx

In [None]:
# create X matrix
X = np.zeros((matrix_size_x, matrix_size_y, len(temporal_data)))
for t, ds in enumerate(temporal_data):
    X[:, :, t] = pd.merge(
        frames,
        ds[["frame_id", "area"]],
        on="frame_id",
        how="left"
    ).sort_values("frame_id")["area"]\
        .fillna(0).values\
        .reshape((matrix_size_x, matrix_size_y))

In [None]:
X.shape

In [None]:
# save deforestation history matrix
with open(config.TR_DEFORESTATION, "wb") as file:
    pickle.dump(X, file)

In [None]:
# save frames geo-coordinates
frames.to_file(config.TR_FRAMES)