Notebook for read and wrangle of counties data

# Prepare environment

In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [3]:
# load legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [4]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

In [5]:
# load frames deforestation area history
deforestation = pd.read_csv(config.TR_DEFORESTATION, index_col=0)
deforestation["quarter_date"] = pd.to_datetime(deforestation["quarter_date"])

In [6]:
# counties data
counties = gpd.read_file(config.COUNTIES_DATA)

# filter counties thta intersects with amazon borders
counties = counties[counties.intersects(am_bounds["geometry"].item())].copy()

## Merge county with frame and extract data

In [7]:
# generate all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [8]:
frames.shape

(777777, 4)

In [9]:
# get county for each frame
region = gpd.overlay(
    frames,
    counties[["CD_MUN", "SIGLA_UF", "populacao", "densidade", "geometry"]],
    how="intersection", 
    keep_geom_type=False
)
region["aprox_ar"] = region.geometry.area

# keep only biggest intersection by frame id
region = region.\
    sort_values("aprox_ar", ascending=False).\
    drop_duplicates(subset="frame_id", keep="first")\
    [["frame_id", "CD_MUN", "populacao", "densidade"]].\
    rename(columns={"CD_MUN": "county_id"}).\
    reset_index(drop=True)

In [10]:
# get deforestation history for each county
df = pd.merge(
    region,
    deforestation,
    on="frame_id",
    how="left",
    validate="1:m"
)
county_defor = df.groupby(["county_id", "quarter_date"])["area"].sum().reset_index()

In [12]:
# save frames county data
region.to_csv(config.TR_COUNTIES)

# save counties deforestation history
county_defor.to_csv(config.TR_COUNTIES_DEFOR)