Notebook for read and wrangle of night lights historical data

# Prepare environment

In [1]:
import os
import pandas as pd

import geopandas as gpd
import rioxarray as rxr

from tqdm import tqdm

In [2]:
import config
from utils import compute_frames

import warnings
warnings.filterwarnings('ignore')

# Data reading

In [3]:
# load legal amazon limits
am_bounds = gpd.read_file(config.AMAZON_FRONTIER_DATA)

In [4]:
# load frames idx detail
frames_idx = pd.read_csv(config.TR_FRAMES_IDX, index_col=0)

In [5]:
# compute all frames
frames = compute_frames(
    am_bounds.total_bounds, 
    config.BOX_SIDE, 
    frames_idx["x"].min(), 
    frames_idx["x"].max(), 
    frames_idx["y"].min(), 
    frames_idx["y"].max()
)

In [6]:
# function to compute frames light from tif file
def join_with_frames(df: pd.DataFrame):
    
    # tranform into geodataframe
    df2 = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:4326"
    ).drop(["x", "y"], axis=1)

    # join
    df_frames = gpd.sjoin(df2, frames, how="inner", op='within')
    
    return df_frames\
        .groupby("frame_id").agg({"light": ["mean", "max"]})\
        .reset_index()

In [7]:
# compute light by quarter (file)
timeseries = []
for f in tqdm(os.listdir(config.NIGHT_LIGHT_DATA)):

    # read file into pandas format
    dataarray = rxr.open_rasterio(os.path.join(config.NIGHT_LIGHT_DATA, f))
    df = dataarray[0].to_pandas()
    df = pd.melt(df, ignore_index=False)\
        .reset_index()\
        .dropna()\
        .rename(columns={"value": "light"})
    # filter out zeros
    df = df[df["light"] > 0]

    # join with frames and add data
    light_frames = join_with_frames(df)
    light_frames["dt"] = pd.to_datetime(f.split("_")[1].split(".")[0])

    # append to time varying list
    timeseries.append(light_frames)

100%|██████████| 28/28 [08:52<00:00, 19.03s/it]


In [8]:
# concat quarters
timeseries_lights = pd.concat(timeseries)
timeseries_lights.columns = ["frame_id", "avg_light", "max_light", "dt"]

# save as csv file
timeseries_lights.to_csv(config.TR_NIGHT_LIGHT, index=False)