# Define some utility functions
### Provide consecutive windows by which a raster can be iterated over.

In [31]:
class ArrayIterator:

    def __init__(self, raster, window_height, window_width):
        self.raster = raster
        self.window_height = window_height
        self.window_width = window_width
        self.current_window = ((0,window_height),(0,window_width))
        self.reached_end = False

    def go_to_next(self):
        # if not yet reached end of row
        if self.current_window[1][1]  < self.raster.width:
            self.current_window = (\
                self.current_window[0],\
                (self.current_window[1][1], self.current_window[1][1] + self.window_width)\
            )
        # if reached end of the row, but not end of table
        elif self.current_window[0][1] < self.raster.height:
            self.current_window = (\
                (self.current_window[0][1], self.current_window[0][1] + self.window_height),\
                (0, self.window_width)\
            )
        # if reached end of table
        else:
            self.reached_end = True
            #raise IndexError("Reached end of table; no next window.")

    def pop_window(self):
        current_window = self.current_window
        self.go_to_next()
        return current_window

    def has_reached_end(self):
        return self.reached_end

    def reset(self):
        self.current_window = ((0,window_height),(0,window_width))
        self.reached_end = False

#### Demonstrate ArrayIterator on an example

In [32]:
def print_window(window):
    print(f'rows:\t\t{window[0][0]} - {window[0][1]}')
    print(f'columns:\t{window[1][0]} - {window[1][1]}\n')

In [34]:
class fake_raster:
    
    def __init__(self, array):
        self.height, self.width = array.shape
        
foo = np.arange(100).reshape((10,10))

window_iterator = ArrayIterator(fake_raster(foo), 5, 5)

while not window_iterator.has_reached_end():
    #print_window(window_iterator.pop_window())
    print(np.array(window_iterator.pop_window()))
    print()
        

[[0 5]
 [0 5]]

[[ 0  5]
 [ 5 10]]

[[ 5 10]
 [ 0  5]]

[[ 5 10]
 [ 5 10]]



### Prepare data: replace nan's with zeros and make binary

In [3]:
def prepare_data(data):
    data = np.nan_to_num(data) # replace nan with zero
    data[data > 0] = 1 # make binary
    return data.astype(np.uint8)

### For a window=((row_upper, row_lower), (col_left, col_right)), get all pixels contained in window

In [2]:
# https://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays
def get_pixels(window):
    return np.array(np.meshgrid(
        np.arange(window[0][0], window[0][1]),
        np.arange(window[1][0], window[1][1]))).T.reshape(-1,2)

# Pipeline
* prepare data
* initialize empty GRID_counter that is same as GRID_raster, but all zeros
* for each pixel FB_pixel in FB_raster:
    * find its geo-location
    * find the pixel GRID_pixel in GRID dataset that *contains* that geo-location
    * if FB_pixel == 1, increment GRID_counter[GRID_pixel]
* return GRID_counter

But do it vectorized by window of FB_raster.

In [4]:
import rasterio
import numpy as np

In [35]:
# define paths and open rasters
fb_path = 'data/humdata/population_nga_2018-10-01.tif'
grid_path = 'data/grid3/NGA - population - v1.2 - mastergrid.tif'

fb_raster = rasterio.open(fb_path)
grid_raster = rasterio.open(grid_path)

In [36]:
grid_raster.shape

(11546, 14413)

Find appropriate window shape for FB raster:
* prime factorization of fb_raster.height = 34558: 2 * 37 * 467
* prime factorization of fb_raster.width = 43172: 2 * 2 * 43 * 251
* choose window shape: (467,251)
* then window size: 467 * 251 = 117217

About memory:
* fb_raster.read(1) has size ca. 11 GB, therefore we don't want to handle it in-memory
* grid_raster.shape = (11546, 14413)
* therefore np.zeros(grid_raster.shape, **dtype=np.uint8**) has size 0.154984 GB, so we can handle it in-memory

In [6]:
# initialize zeros array of same shape as GRID raster
result = np.zeros(grid_raster.shape, dtype=np.uint8)

In [None]:
# initialize iterator
window_iterator = ArrayIterator(fb_raster, 467, 251)

#### rasterio + numpy go brrr

In [None]:
while not window_iterator.has_reached_end():
    
    window = window_iterator.pop_window()
    assert (window[0][1]-window[0][0]) == 467
    assert (window[1][1]-window[1][0]) == 251
    
    # read data from FB raster using current window
    data = fb_raster.read(1,window=window)
    assert np.all(data.shape == (467,251))
    
    # replace nan with 0 and >0 with 1
    data = prepare_data(data)
    
    # get all pixels contained in window
    pixels = get_pixels(window)
    
    # keep only those pixels for which data has a 1 entry
    pixels = pixels[data.ravel() > 0]
    
    # check if there are any pixels, if continue from next iteration
    if pixels.size > 0:
        
        # use FB raster to get coordinates for each pixel
        fb_raster_vxy = np.vectorize(fb_raster.xy) # gets center
        xcoords, ycoords = fb_raster_vxy(pixels[:,0], pixels[:,1])
        
        # for each coordinate get corresponding pixel in the GRID raster
        grid_raster_vindex = np.vectorize(grid_raster.index)
        grid_pixels = np.vstack(grid_raster_vindex(xcoords, ycoords, op=round, precision=15)).T
        
        # get unique counts for pixels in GRID raster
        grid_pixels_unique, counts = np.unique(grid_pixels, return_counts=True, axis=0)
        
        # update result
        result[grid_pixels_unique[:,0], grid_pixels_unique[:,1]] += counts.astype(np.uint8)

# took 17 mins to run

In [None]:
# write to csv file
#result.tofile('nigeria-fb_to_grid_mapping.csv', ',')

In [None]:
# read from csv file
#myresult = np.genfromtxt('nigeria-fb_to_grid_mapping.csv', delimiter=',')

#### rasterio + numpy go brrr

In [8]:
while not window_iterator.has_reached_end():
    
    window = window_iterator.pop_window()
    assert (window[0][1]-window[0][0]) == 467
    assert (window[1][1]-window[1][0]) == 251
    
    # read data from FB raster using current window
    data = fb_raster.read(1,window=window)
    assert np.all(data.shape == (467,251))
    
    # replace nan with 0 and >0 with 1
    data = prepare_data(data)
    
    # get all pixels contained in window
    pixels = get_pixels(window)
    
    # keep only those pixels for which data has a 1 entry
    pixels = pixels[data.ravel() > 0]
    
    # check if there are any pixels, if continue from next iteration
    if pixels.size > 0:
        
        # use FB raster to get coordinates for each pixel
        fb_raster_vxy = np.vectorize(fb_raster.xy) # gets center
        xcoords, ycoords = fb_raster_vxy(pixels[:,0], pixels[:,1])
        
        # for each coordinate get corresponding pixel in the GRID raster
        grid_raster_vindex = np.vectorize(grid_raster.index)
        grid_pixels = np.vstack(grid_raster_vindex(xcoords, ycoords, op=round, precision=15)).T
        
        # get unique counts for pixels in GRID raster
        grid_pixels_unique, counts = np.unique(grid_pixels, return_counts=True, axis=0)
        
        # update result
        result[grid_pixels_unique[:,0], grid_pixels_unique[:,1]] += counts.astype(np.uint8)

# took 17 mins to run

In [10]:
# write to csv file
#result.tofile('nigeria-fb_to_grid_mapping.csv', ',')

In [None]:
# read from csv file
#myresult = np.genfromtxt('nigeria-fb_to_grid_mapping.csv', delimiter=',')