In [12]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

import rasterio

from tqdm.auto import tqdm
import plotnine
from plotnine import *

In [2]:
# read data
classification_img = rasterio.open('Data/Mosaic_China_1990_2022.tif')
classification_img_arry = classification_img.read()

# the tile box
tile_box_China = gpd.read_file('Data/split_box_China.shp')

### iterate each tile_box, see if the latest urban area exceed the confidence range of historical development
- pixel count of 500 is approaxmatelly the 20 perceltile of pixel_count for all grid cells

In [45]:
# Define the curve_func function to fit
def curve_func(x, a, b):
    return a * np.log(x) + b

# the function to check if the prediction exceed the range of confidence interval
def check_pred(pix_val,pix_cusum_count,in_func=curve_func,se_gap=1):

    # by defacult, the last value is the one to be checked
    X = pix_val[:-1]
    y = pix_cusum_count[:-1]

    # Perform the exponential fit using curve_fit
    params, covariance = curve_fit(in_func, X, y)

    # Get the optimized parameters a and b
    a, b = params

    # Get the standard deviations of the parameters (square root of the diagonal elements of the covariance matrix)
    param_errors = np.sqrt(np.diag(covariance))

    # Calculate the confidence interval for each parameter (assuming a normal distribution)
    conf_interval = se_gap * param_errors

    # Calculate the lower and upper bounds of the confidence interval
    a_lower_bound, a_upper_bound = a - conf_interval[0], a + conf_interval[0]
    b_lower_bound, b_upper_bound = b - conf_interval[1], b + conf_interval[1]

    # get the pred val and actual_val
    actual_X = pix_val[-1]
    actual_y = pix_cusum_count[-1]

    pred_low = in_func(actual_X, a_lower_bound, b_lower_bound)
    pred_upp = in_func(actual_X, a_upper_bound, b_upper_bound) 


    # check if the actual_y exceed the confidence range
    if (actual_y < pred_low) or (actual_y > pred_upp):
        return 'exceed'
    else:
        return 'no_exceed'

In [73]:
# function to check if the latest urban area exceed the confidence range of historical development
def check_if_exceed_trend(row,in_img=classification_img,in_array=classification_img_arry):
    
    # get the upper-left coordinates
    box = row.geometry.bounds
    lon,lat = box[0],box[3]
    
    # lon/lat --> col/row
    row,col = in_img.index(lon,lat)
    
    # subset array using col/row
    array_in_box = in_array[0][row:row+1024,col:col+1024]
    array_in_box_shape = array_in_box.shape
    
    # 1) if the total urban pixel < 500, then skip
    if (array_in_box > 0).sum() < 500:
        return 'skipped'

    # 2) check if the classified urban pixels exceed historical trojectory
    pix_val, pix_count = np.unique(array_in_box, return_counts=True)
    # skip if the pix_val are too less to fit
    if len(pix_val) < 5:
        return 'skipped'
    else:
        # remove the pix val == 0, and make the count cumulative
        pix_val = pix_val[1:]
        pix_count = np.cumsum(pix_count[1:])
        return check_pred(pix_val,pix_count)

In [None]:
# check each grid cell
check_result = []
for idx,row in tqdm(tile_box_China.iterrows(),total=len(tile_box_China)):
    check_result.append(check_if_exceed_trend(row))

  0%|          | 0/13299 [00:00<?, ?it/s]