In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from itertools import repeat
from functools import partial

from geopandas.tools import sjoin
from tqdm.auto import tqdm

from concurrent.futures import ThreadPoolExecutor,as_completed,ProcessPoolExecutor
import multiprocessing as mp

In [2]:
# define regions
regions = ['dongbei','huabei','xibei','zhongnan'] # ['dongbei','huabei','huadong','xibei','xinan','zhongnan'] 

# define the sample size
sample_size = 7000

In [3]:
# read data
pts_urban = gpd.read_file(f'../../Reference_pts/Urban_pts/urban_all_points_subset.shp')
pts_non_urban = gpd.read_file(f'../../Reference_pts/Non_urban_pts/non_urban_all_points_subset.shp')

rect = gpd.read_file('./China_ROI_rect/China_ROI_rect.shp')
China = gpd.read_file('../../../../Process_1_Research_area_Zoning/Data_Boundary_SHP/geography_zone/qu-sheng_dissolved.shp')

In [4]:
# function to sample n points from each roi_rectangel
def sample_from_rect(roi_rect,built_pts):

    roi_df = roi_rect.set_crs(4326)
    sample_pts = sjoin(built_pts,roi_df,predicate='within')

    if len(sample_pts) >= 1: 
        sample_pts = sample_pts.sample(1)
    return sample_pts

In [5]:
# filter the region
for region in regions:
    
    # get built point from urban/non-urban area
    region_shp = China[China['region']==region]
    built_urban = sjoin(pts_urban,region_shp,predicate='within')
    built_non_urban = sjoin(pts_non_urban,region_shp,predicate='within')
    
    # filter built points and roi-rect using the region_shp
    built_merge = pd.concat([built_urban,built_non_urban]).reset_index(drop=True)[['geometry']]
    roi_rect_region = sjoin(rect,region_shp,predicate='within').reset_index(drop=True)[['geometry']]
    
    # convert each row of roi_rect_region to a gdf
    rect_row_dfs = [gpd.GeoDataFrame([{'geometry':row['geometry']}]) for _,row in roi_rect_region.iterrows()]
    
    
    # _____________________ Multiprocessing _________________________
    results = []
    n_workder = 5

    with ThreadPoolExecutor(max_workers= n_workder) as executor:

        # Submit tasks to the executor and associate each task with a progress bar
        futures = [executor.submit(partial(sample_from_rect,built_pts=built_merge), row) for row in rect_row_dfs]

        # Find the associated progress bar and update it
        with tqdm(total=len(futures)) as pbar: 
            # Use as_completed to track the progress of completed sub-processes
            for future in as_completed(futures):
                result = future.result()
                if len(result) >0: results.append(result[['geometry']])    
                pbar.update(1)
                
    # _____________________ Save to disk _________________________
    out_samples_df = pd.concat(results)
    out_num = min(len(out_samples_df),sample_size)
    out_samples_df.sample(out_num).to_file(f'../../03_Check_built_pts/03_sample_pts_urban_rural/sample_pts_built_{region}.shp')

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84" ...
Right CRS: EPSG:4326

  roi_rect_region = sjoin(rect,region_shp,predicate='within').reset_index(drop=True)[['geometry']]


  0%|          | 0/18215 [00:00<?, ?it/s]

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84" ...
Right CRS: EPSG:4326

  roi_rect_region = sjoin(rect,region_shp,predicate='within').reset_index(drop=True)[['geometry']]


  0%|          | 0/30194 [00:00<?, ?it/s]

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84" ...
Right CRS: EPSG:4326

  roi_rect_region = sjoin(rect,region_shp,predicate='within').reset_index(drop=True)[['geometry']]


  0%|          | 0/16575 [00:00<?, ?it/s]

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84" ...
Right CRS: EPSG:4326

  roi_rect_region = sjoin(rect,region_shp,predicate='within').reset_index(drop=True)[['geometry']]


  0%|          | 0/67497 [00:00<?, ?it/s]