In [108]:
import os
import numpy as np

import fiona
import rasterio
import rasterio.mask
from rasterio.windows import Window

import geopandas as gpd
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import colors

from tqdm import tqdm

import torch
from sklearn.model_selection import train_test_split

In [73]:
# load all CAR
path = "../data/car/acre/"
folders = [name for name in os.listdir(path) if "SHAPE" in name]
df = gpd.GeoDataFrame()
for folder in folders:
    df_new = gpd.read_file(path + folder + "/AREA_IMOVEL/AREA_IMOVEL.shp")
    df = pd.concat([df, df_new])

# load sample project area
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
path_pa = "../data/pa/ValparaisoProperty (Kml).kml"
pa = gpd.read_file(path_pa, driver='KML')
pa = pa.to_crs("EPSG:4674")

# function to load biomass data
def load_biomass_data(year, shape, resolution=250):
    path_bio = f"../data/biomass/{resolution}m/" + f"mapbiomas-brazil-collection-70-acre-{year}.tif"
    with rasterio.open(path_bio) as src:
        if shape is not None:
            bio_data, out_transform = rasterio.mask.mask(src, shape, crop=True)
        else:
            bio_data = src.read(1)
        bio_data = np.squeeze(bio_data)
        out_meta = src.meta
    return bio_data

# function to transform labels to 1=forest, 2=non_forest, 0=unknown
def transform_to_labels(bio_data):
    class_dict = {1:1, 3:1, 4:1, 5:1,49:1, # forest
                10:2,11:2,12:2,32:2,29:2,13:2, 13:2, 50:2, # natural
                14:2,15:2,18:2,19:2,39:2,20:2,40:2,61:2,41:2,36:2,46:2,47:2,48:2,9:2,21:2, # farming
                22:2,23:2,24:2,30:2,25:2, # urban
                26:2,33:2,31:2, # water
                27:0,0:0} # unobserved
    bio_data_new = np.zeros_like(bio_data)
    for key, value in class_dict.items():
        bio_data_new[bio_data == key] = value
    return np.array(bio_data_new, dtype=np.int8)

# calculate CAR area
df['area_6933'] = df.geometry.to_crs("EPSG:6933").area

# filter by area
pa_area = pa.to_crs("EPSG:6933").area.sum()
df = df[(df.area_6933 > 0.75 * pa_area) & (df.area_6933 < 1.25 * pa_area)]

# avoid overlap
for polygon in pa.geometry:
    df = df[np.invert(df.overlaps(polygon).values)]

# ensure sufficient forest cover
forest_cover_1985 = []
for car in df.iterrows():
    bio_data = load_biomass_data(1985, [car[1].geometry], resolution=250)
    bio_data = transform_to_labels(bio_data)
    forest_cover = np.count_nonzero(bio_data == 1) / np.count_nonzero(bio_data > 0)
    forest_cover_1985.append(forest_cover)
df['fc_1985'] = forest_cover_1985
df = df[df.fc_1985 >= 0.9]

  return lib.overlaps(a, b, **kwargs)


In [78]:
# load car images as tensors
car_tensors = []

start_train = 2006
start_target = 2016
start_test = 2019
horizon = 3

patch_size = 400
overlap = 100

for car in tqdm(df.iterrows()):
    years = np.arange(start_train, start_test+horizon)

    bio_data_dict = {}
    for year in years:
        bio_data = load_biomass_data(year, [car[1].geometry], resolution=30)
        bio_data = transform_to_labels(bio_data)
        bio_data_dict[year] = bio_data

    deforestation = np.zeros_like(bio_data_dict[start_target - 1], dtype=bool)
    for year in np.arange(start_target, start_target+horizon):
        deforested = bio_data_dict[start_target - 1] - bio_data_dict[year]
        deforestation = deforestation | (deforested < 0)
    bio_data_dict[1111] = deforestation
    
    car_tensors.append(np.array(list(bio_data_dict.values())))
shapes = [car_tensor.shape for car_tensor in car_tensors]
target_shape = np.max(shapes, axis=0)
residual = np.array([target_shape[0], patch_size, patch_size]) - target_shape%patch_size
target_shape = target_shape + residual
reshaped_car_tensors = []
for car_tensor in car_tensors:
    reshaped_car_tensors.append(np.pad(car_tensor, (
        (0, 0), 
        (0, target_shape[1] - car_tensor.shape[1]), 
        (0, target_shape[2] - car_tensor.shape[2]))
    ))
reshaped_car_tensors = np.array(reshaped_car_tensors)

# to torch and split in patches
car_tensors = torch.from_numpy(reshaped_car_tensors)
patches = car_tensors.unfold(2, patch_size, patch_size - overlap)
patches = patches.unfold(3, patch_size, patch_size - overlap)
patches = torch.moveaxis(patches, 1, -1)
patches = torch.flatten(patches, 0, 2)

37it [00:04,  7.91it/s]


In [79]:
patches[patches[:,:,:,0].sum(axis=1).sum(axis=1) > 0].shape

torch.Size([344, 400, 400, 17])

In [80]:
patches[patches[:,:,:,-1].sum(axis=1).sum(axis=1) > 0].shape

torch.Size([258, 400, 400, 17])

In [100]:
# load entire image

start_train = 2006
start_target = 2016
start_test = 2019
horizon = 3

patch_size = 400
overlap = 0

years = np.arange(start_train, start_test+horizon)

bio_data_dict = {}
for year in years:
    bio_data = load_biomass_data(year, None, resolution=30)
    bio_data = transform_to_labels(bio_data)
    bio_data_dict[year] = bio_data

deforestation = np.zeros_like(bio_data_dict[start_target - 1], dtype=bool)
for year in np.arange(start_target, start_target+horizon):
    deforested = bio_data_dict[start_target - 1] - bio_data_dict[year]
    deforestation = deforestation | (deforested < 0)
bio_data_dict[1111] = deforestation

site_tensor = np.array(list(bio_data_dict.values()))

In [101]:
target_shape = np.array(site_tensor.shape)
residual = np.array([target_shape[0], patch_size, patch_size]) - target_shape%patch_size
target_shape = target_shape + residual
reshaped_site_tensor = np.pad(site_tensor, (
        (0, 0), 
        (0, target_shape[1] - site_tensor.shape[1]), 
        (0, target_shape[2] - site_tensor.shape[2]))
    )

In [102]:
# to torch and split in patches
site_tensor = torch.from_numpy(reshaped_site_tensor)
patches = site_tensor.unfold(1, patch_size, patch_size - overlap)
patches = patches.unfold(2, patch_size, patch_size - overlap)
patches = torch.moveaxis(patches, 0, -1)
patches = torch.flatten(patches, 0, 1)

In [103]:
print('Non empty patch: ', patches[patches[:,:,:,0].sum(axis=1).sum(axis=1) > 0].shape)
print('Changing patch: ', patches[patches[:,:,:,-1].sum(axis=1).sum(axis=1) > 0].shape)

Non empty patch:  torch.Size([1277, 400, 400, 17])
Changing patch:  torch.Size([1091, 400, 400, 17])


In [122]:
patches = patches[patches[:,:,:,-1].sum(axis=1).sum(axis=1) > 0]
patches = patches[:,:,:,:-1]

In [124]:
# split the dataset into train and test sets
nr_years_train = start_target - start_train
train_patches, val_patches, train_labels, val_labels = train_test_split(patches[:,:,:,:nr_years_train], patches[:,:,:,nr_years_train:nr_years_train+horizon], test_size=0.2, random_state=42)
test_patches = patches[:,:,:,horizon:nr_years_train+horizon]
test_labels = patches[:,:,:,-horizon:]

In [127]:
# create the train and test datasets using the train_patches and test_patches tensors
train_dataset = torch.utils.data.TensorDataset(train_patches, train_labels)
val_dataset = torch.utils.data.TensorDataset(val_patches, val_labels)
test_dataset = torch.utils.data.TensorDataset(test_patches, test_labels)

# create the data loaders for the train and test datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)