# Join multiple rasters to many points, quickly

In [1]:
# Mask raster values > 360
# Build this so we can loop over a collection of rasters
# Do the aggregation operations per admin unit in Dask.DataFrames

import dask
import coiled
from dask.distributed import Client, LocalCluster, Lock
from dask.utils import SerializableLock
import dask.dataframe as dd

import pandas as pd
import geopandas as gpd
import spatialpandas as sp
import dask_geopandas as dg

import rioxarray as rx
import xarray as xr

import re
import os

from dask_control import *
from raster_vals_to_pts import *

import numpy as np
dask.config.set({"temporary-directory": "C:/Users/wb467985"})


<dask.config.set at 0x18dd3874910>

In [2]:
dask.__version__

'2021.04.1'

In [3]:
bounds_1 = "P:/PAK/GEO/Boundaries/OCHA/pak_admbnda_adm1_ocha_pco_gaul_20181218.shp"
bounds_3 = "P:/PAK/GEO/Boundaries/OCHA/pak_admbnda_adm3_ocha_pco_gaul_20181218.shp"
# access_dir = "P:/PAK/Code/kpgit_robert/data/access/current/master"
# access_dir = r'P:\PAK\Code\OSMEfficacy\access\211214' # switching in access dir for BeforeOSMInvestment files
access_dir = r'P:\PAK\Code\kpgit_robert\data\access\globalFS' # switching in access dir for Global Friction surface fies
POINTS_URL = "../../data/interim/points_v15_full.csv"

In [4]:
#bounds_1 = "../../data/raw/geo/pak_adm_ocha_pco_gaul_20181218_SHP/pak_admbnda_adm1_ocha_pco_gaul_20181218.shp"
#bounds_3 = "../../data/raw/geo/pak_adm_ocha_pco_gaul_20181218_SHP/pak_admbnda_adm3_ocha_pco_gaul_20181218.shp"
#access_dir = "../../data/raw/access"
#POINTS_URL = "../../data/interim/points_v15_full.csv"


In [5]:
FRICTION_THRESHOLD = 150
DENSITY_THRESHOLD = 0.02

# change this to whatever the desired output projection is
DEST_CRS = 'EPSG:32642'

dcrs_int = int(re.findall('[0-9]+',DEST_CRS)[0])
dcrs_int

32642

In [6]:
# define time period under study
# time_period = 'Current'
# time_period = 'BeforeOSMInvesment'
time_period = 'GlobalFS'

Instantiate Dask

In [7]:
client=get_dask_client(cluster_type='local',n_workers=4,processes=True,threads_per_worker=8)
client



Client acquired. Restarting


0,1
Client  Scheduler: tcp://127.0.0.1:8786  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 32  Memory: 63.98 GiB


Read in CSV

In [9]:
pd.read_csv("../../data/interim/points_v15_full.csv", nrows=3)

Unnamed: 0,ix,POP,x_4326,y_4326,x,y,ADM1_EN,ADM2_EN,ADM3_EN,Adm1_Code,Adm2_Code,Adm3_Code
0,2148470,5.587176,73.545833,36.873889,905257.23625,4090541.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
1,2148497,5.587176,73.654444,36.873056,914950.266604,4090916.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2,2148611,5.587176,73.394444,36.868889,891779.067584,4089352.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602


In [8]:
test_pts = pd.read_csv("../../data/interim/points_v15_full.csv", nrows=50000)

In [30]:
# Load Points
points = dd.read_csv(POINTS_URL, #storage_options=STORAGE_OPTS,
                     usecols = [
                         0, #"fid",
                         1, #"POP",
#                          2,#"x_wgs84",
#                          3,#"y_wgs84"
                         4,#"x",
                         5,#"y"
                         6,#"ADM1_EN"
                         7,#"ADM2_EN"
                         8,#"ADM3_EN"
                         9,#Adm1_Code
                         10,#Adm2_Code
                         11,#Adm3_Code
                     ],
                     header=0,
                     names=[
                         "ix",
                         "POP",
#                          "x_4326",
#                          "y_4326",
                         "x",
                         "y",
                         "ADM1_EN",
                         "ADM2_EN",
                         "ADM3_EN",
                         "Adm1_Code",
                         "Adm2_Code",
                         "Adm3_Code"
                     ],
                     dtype = {
                         "ix": int, 
                         "POP": float, 
#                          "x_4326": float, 
#                          "y_4326": float,
                         "x": float, 
                         "y": float,  
                         "ADM1_EN": str,
                         "ADM2_EN": str,
                         "ADM3_EN": str,
                         "Adm1_Code": str,
                         "Adm2_Code": str,
                         "Adm3_Code": str
                     },
                     na_values = ' ',
                     #blocksize='5mb'#'100mb'
                ).set_index('ix', drop=True, sorted=True)
points.head()

Unnamed: 0_level_0,POP,x,y,ADM1_EN,ADM2_EN,ADM3_EN,Adm1_Code,Adm2_Code,Adm3_Code
ix,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2148470,5.587176,905257.23625,4090541.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2148497,5.587176,914950.266604,4090916.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2148611,5.587176,891779.067584,4089352.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2148619,5.587176,891830.046673,4089324.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2148666,5.587176,895484.006545,4089246.0,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602


Process rasters

In [9]:
rasters = {}
rlimit = len(os.listdir(access_dir))
r_ct = 0
for file in os.listdir(access_dir):
    if file.endswith("_COG.tif"):
#     if file.startswith(f'{time_period}_'):
        acc_rast = re.search(f'multimodal_(.*?)_COG.tif',os.path.basename(file)).group(1)
#         acc_rast = re.search(f'(.*?)_COG.tif',os.path.basename(file)).group(1)
        rasters[acc_rast] = f"{access_dir}/{file}"
        r_ct = r_ct + 1
        if r_ct >= rlimit:
            break

rasters

{'all_District_HQs': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_District_HQs_COG.tif',
 'all_education_allboys': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_allboys_COG.tif',
 'all_education_allgirls': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_allgirls_COG.tif',
 'all_education_boys': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_boys_COG.tif',
 'all_education_boys_high': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_boys_high_COG.tif',
 'all_education_boys_middle': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_boys_middle_COG.tif',
 'all_education_boys_primary': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalFS/GlobalFS_multimodal_all_education_boys_primary_COG.tif',
 'all_education_girls': 'P:\\PAK\\Code\\kpgit_robert\\data\\access\\globalF

In [10]:
loaded_rasters = {}
for key in rasters:
    print(f"Persist raster: {key} at {rasters[key]}")
    raster = xr.open_rasterio(f"{rasters[key]}", 
#                                    chunks = (4, "auto", -1), # working
                                   chunks = ("auto", "auto", "auto"),
#                                            chunks = {'band' : "auto", 'x' : "auto", 'y' : "auto"},
                                   parse_coordinates=True)#.unify_chunks()
                                   #lock = True)
    loaded_rasters[key] = raster

Persist raster: all_District_HQs at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_District_HQs_COG.tif
Persist raster: all_education_allboys at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_allboys_COG.tif
Persist raster: all_education_allgirls at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_allgirls_COG.tif
Persist raster: all_education_boys at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_boys_COG.tif
Persist raster: all_education_boys_high at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_boys_high_COG.tif
Persist raster: all_education_boys_middle at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_boys_middle_COG.tif
Persist raster: all_education_boys_primary at P:\PAK\Code\kpgit_robert\data\access\globalFS/GlobalFS_multimodal_all_education_boys_primary_COG.tif
Persist raster: all_education_gir

In [11]:
# THESE MUST BE COGS
rasters_ds = (
    xr.Dataset(loaded_rasters)
    .sel(band=1)
    .map(lambda arr: arr.where(arr != arr.nodatavals[0]))
)

In [12]:
# points.head()

Process points onto rasters (lazy execution)

In [24]:
rasters_ds

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [86]:
rasters_ds['all_education_allgirls']

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.76 MiB 1.76 MiB Shape (808, 571) (808, 571) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",571  808,

Unnamed: 0,Array,Chunk
Bytes,1.76 MiB,1.76 MiB
Shape,"(808, 571)","(808, 571)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [15]:
rasters_ds['BeforeOSMInvestment_childwalk_dry_education_allgirls']

Unnamed: 0,Array,Chunk
Bytes,1.35 GiB,31.94 MiB
Shape,"(21136, 17193)","(487, 17193)"
Count,177 Tasks,44 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.35 GiB 31.94 MiB Shape (21136, 17193) (487, 17193) Count 177 Tasks 44 Chunks Type float32 numpy.ndarray",17193  21136,

Unnamed: 0,Array,Chunk
Bytes,1.35 GiB,31.94 MiB
Shape,"(21136, 17193)","(487, 17193)"
Count,177 Tasks,44 Chunks
Type,float32,numpy.ndarray


In [118]:
points_xr.head()

In [24]:
test_pts

Unnamed: 0,ix,POP,x_4326,y_4326,x,y,ADM1_EN,ADM2_EN,ADM3_EN,Adm1_Code,Adm2_Code,Adm3_Code
0,2148470,5.587176,73.545833,36.873889,905257.236250,4.090541e+06,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
1,2148497,5.587176,73.654444,36.873056,914950.266604,4.090916e+06,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
2,2148611,5.587176,73.394444,36.868889,891779.067584,4.089352e+06,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
3,2148619,5.587176,73.395000,36.868611,891830.046673,4.089324e+06,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
4,2148666,5.587176,73.435833,36.866389,895484.006545,4.089246e+06,Khyber Pakhtunkhwa,Chitral,Mastuj,PK2,PK206,PK20602
...,...,...,...,...,...,...,...,...,...,...,...,...
49995,2274365,83.226013,73.224861,35.268611,884378.373393,3.911023e+06,Khyber Pakhtunkhwa,Kohistan,Dassu,PK2,PK212,PK21201
49996,2274366,41.613007,73.225370,35.268426,884425.610930,3.911005e+06,Khyber Pakhtunkhwa,Kohistan,Dassu,PK2,PK212,PK21201
49997,2274367,83.226013,73.225556,35.268611,884441.592421,3.911026e+06,Khyber Pakhtunkhwa,Kohistan,Dassu,PK2,PK212,PK21201
49998,2274368,41.613007,73.225833,35.268611,884466.880037,3.911027e+06,Khyber Pakhtunkhwa,Kohistan,Dassu,PK2,PK212,PK21201


In [34]:
points_xr = xr.Dataset.from_dataframe(test_pts[["x", "y"]])

# Select pixel values at points
# FOR MAKSIM -- so basically, we convert the points to an aligned raster dataset, select values that way, then return the values to a points dataset?
pixel_values = rasters_ds['all_health_secondary'].sel(x=points_xr.x, y=points_xr.y, method="nearest")

In [35]:
pixel_values_df = pixel_values.reset_coords(drop=True).to_dataframe(name='test')

In [36]:
pixel_values_df.sort_values('test',ascending=False)

Unnamed: 0_level_0,test
index,Unnamed: 1_level_1
0,
1,
2,
3,
4,
...,...
49995,
49996,
49997,
49998,


In [87]:
# map pixel values to the points by relating the master Xarray dataset (not the individual TIF rasters) to the points
df_pixels = pixel_values_at_points(rasters_ds['all_health_secondary'], points)
# df_pixels.head()

In [88]:
df_pixels.compute()

[]

In [None]:
# df_pixels = df_pixels[df_pixels.dry_education_primary < FRICTION_THRESHOLD]
df_pixels = df_pixels[df_pixels.all_education_primary < FRICTION_THRESHOLD]
df_pixels = df_pixels.loc[:,~df_pixels.columns.duplicated()]
df_pixels = df_pixels.persist()

Basic data manipulations

In [None]:
# Get Pops by Adm2_Code
adm2_pop = df_pixels.groupby('Adm2_Code')['POP'].sum().to_frame("adm2_pop")

# Get Pops by CBS Ward
adm3_pop = df_pixels.groupby('Adm3_Code')['POP'].sum().to_frame("adm3_pop")

In [None]:
# Merge the Pops into Ref DF
df_pixels = dd.merge(df_pixels, adm2_pop, how = 'left', left_on="Adm2_Code", right_index=True)
df_pixels = dd.merge(df_pixels, adm3_pop, how = 'left', left_on="Adm3_Code", right_index=True)

#points = points.persist()
df_pixels = df_pixels.persist()

In [None]:
df_pixels

In [None]:
df_pixels.head()

In [None]:
# Calculate the population weight of each pixel within its enclosing admin area -- e.g. 10 pixel population for a 100 population admin - 0.1 weight
df_pixels['wt_adm_2'] = (df_pixels['POP'] / df_pixels['adm2_pop'])
df_pixels['wt_adm_3'] = (df_pixels['POP'] / df_pixels['adm3_pop'])

Create a column per raster that we will populate with the corresponding raster's value

In [None]:
adm2_avg_cols = []
adm3_avg_cols = []

for rkey in rasters:
    hrs_col = f"{rkey}"
    avg_col_adm_2 = f"{rkey}_avg_adm2"
    avg_col_adm_3 = f"{rkey}_avg_adm3"
    df_pixels[avg_col_adm_2] = df_pixels[hrs_col] * df_pixels['wt_adm_2']
    df_pixels[avg_col_adm_3] = df_pixels[hrs_col] * df_pixels['wt_adm_3']
    
    adm2_avg_cols.append(avg_col_adm_2)
    adm3_avg_cols.append(avg_col_adm_3)

In [None]:
print(",".join(adm2_avg_cols))
print(",".join(adm3_avg_cols))

Aggregate per admin the column values

In [None]:
adm2_final = df_pixels.groupby(['Adm2_Code'])[adm2_avg_cols].sum().reset_index()
adm3_final = df_pixels.groupby(['Adm3_Code'])[adm3_avg_cols].sum().reset_index()

Export

In [None]:
adm2_output = f"../../data/outputs/{time_period}_adm2_mean.csv"
adm3_output = f"../../data/outputs/{time_period}_adm3_mean.csv"
df_pixels_out = f"../../data/outputs/{time_period}_df_pixels_final-*.csv" # make sure you have a * or it will output parquet files

In [16]:
%time
adm2_final.to_csv(adm2_output, single_file=True)

Wall time: 0 ns


NameError: name 'adm2_final' is not defined

In [None]:
%time
adm3_final.to_csv(adm3_output, single_file=True)

In [26]:
df_pixels = df_pixels.repartition(partition_size="100MB")

In [27]:
%time
df_pixels.to_csv(df_pixels_out, single_file=False)

Wall time: 0 ns


['P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-00.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-01.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-02.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-03.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-04.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-05.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-06.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-07.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-08.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-09.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-10.csv',
 'P:/PAK/Code/kpgit/kpgit/data/outputs/BeforeOSMInvesment_df_pixels_final-11.csv',
 'P: