In [1]:
import os, sys
import numpy as np
import geopandas as gpd
# sys.path.append('../OnStove')

In [2]:
# %load_ext autoreload

In [3]:
# %autoreload 2
from onstove import OnStove, DataProcessor, RasterLayer, VectorLayer
import time

# Data processing

## 1. Create a data processor
First, we will create an instance of the `DataProcessor` object that will be used to add raw datasets, process them and generate the required output datapackages for the CCE and OnSSTOVE. The `DataProcessor` object accepts three optional argument `project_crs`, `cell_size` and `output_directory`. The first, sets the Coordinate reference System (crs) to use in the project, this means that all datasets will be reprojected to match such crs. The second, is used to define the desired cell size (i.e. width and height in meters) of the output datasets, which will be used to ensure that all output datasets match such cell size. The third, is simply used as the output folder in which all results will be writen, this parameter defaults to `output`.

In [4]:
start = time.time()

data = DataProcessor(project_crs=3857, cell_size=(1000, 1000))
output_directory = '../5. Results'
data.output_directory = output_directory

## 2. Add a mask layer (country boundaries) and a base layer
A vector layer can be added as a mask, which will be later used to mask/clip all output datasets to the layer boundaries. For this, we use the `add_mask_layer` method providing a `name` for the layer, a `layer_path` from where to read the data and in this case the `postgres` argument as `True` that will tell that the layer is found in the PostgreSQL database.

In [5]:
adm_path = r"../2. Data/Other\Administrative boundaries\NPL_adm1_Province0.shp"
data.add_mask_layer(category='Administrative', name='Country_boundaries', path=adm_path)


A raster base layer is needed to make every output match its grid and extent. For this, two additional options need to be passed to the `add_layer` method:
* `base_layer`: if `True` the added layer will be considered as the base layer. 
* `resample`: this is the resampling method to be used when resampling this layer to the desired `cell_size` if a `cell_size` is provided.

## 3. Add GIS layers
Similarly, we can add data layers using the `add_layer` method. A layer `name`, `layer_path` and `postgres` conection also need to be provided (the `postgres` conection defaults to `False`). In addition, the following arguments can be passed:
* `category`: this is used to group all datasets into a category in the final output, e.g. `demand` or `supply`. 
* `layer_type`: this argument is required with two possible options `raster` or `vector`, we should pass either one according to the dataset you are adding. 
* `resample`: this defines what resampling method to use when changing the resolution of the raster. The change of resolution happens when the layer gets aligned with the base layer.

### Demographics

In [6]:
pop_path = r"../2. Data\Demand\Population\HRSL\population_npl_2018-10-01_geotiff\population_npl_2018-10-01.tif"
data.add_layer(category='Demographics', name='Population', 
               path=pop_path, layer_type='raster', resample='sum')

ghs_path = r"../2. Data/Other\Urban - Rural divide\GHS.tif"
data.add_layer(category='Demographics', name='Urban_rural_divide', 
               path=ghs_path, layer_type='raster', resample='mode')



### Biomass

In [7]:
forest_path = r"../2. Data/Other\Forest cover\Forest_height.tif"
data.add_layer(category='Biomass', name='Forest',
               path=forest_path, layer_type='raster', resample='sum')
data.layers['Biomass']['Forest'].data[data.layers['Biomass']['Forest'].data < 5] = 0
data.layers['Biomass']['Forest'].data[data.layers['Biomass']['Forest'].data >= 5] = 1
data.layers['Biomass']['Forest'].meta['nodata'] = 0
transform = data.layers['Biomass']['Forest'].calculate_default_transform(data.project_crs)[0]
factor = (data.cell_size[0] ** 2) / (transform[0] ** 2)


friction_path = r"../2. Data/Other\Walking friction\2020_walking_only_friction_surface.geotiff"
data.add_layer(category='Biomass', name='Friction', path=friction_path, distance_method='travel_time',
               layer_type='raster', resample='average', window=True, base_layer=True)



### Electricity

#### Medium voltage lines

In [8]:
mv_path = r"../2. Data\Supply\Power network\MV-network\Nepal_DL0.shp"
data.add_layer(category='Electricity', name='MV_lines', 
               path=mv_path, layer_type='vector',
               query="Status == 'In Service' | Status == 'In Service (Private)'")

#### Night time lights

In [9]:
ntl_path = r"../2. Data\Other\Night Time Lights\VNL_v21_npp_2020_global_vcmslcfg_c202205302300.average_masked.dat.tif"
data.add_layer(category='Electricity', name='Night_time_lights', 
               path=ntl_path, layer_type='raster', resample='average')



#### Mini grids

In [10]:
mg_points_path = r"../2. Data\Supply\Power network\MG-hydro\micro_hydropower.shp"
data.add_layer(category='Electricity', name='MG_points', 
               path=mg_points_path, layer_type='vector')

mg_access_path = r"../2. Data\Supply\Power network\MG-hydro\Municipalities with MG hydro\mg_hydro.geojson"
data.add_layer(category='Electricity', name='MG_access', 
               path=mg_access_path, layer_type='vector')
data.layers['Electricity']['MG_access'].data.rename({'Municipality': 'municipality', 'kW_constructed': 'capacity', 'HHs_constructed': 'households'}, inplace=True, axis=1)
data.layers['Electricity']['MG_access'].data = data.layers['Electricity']['MG_access'].data[['municipality', 'capacity', 'households', 'geometry']]

### LPG

In [11]:
lpg_path = r"../2. Data\Other\Traveltime\traveltime_to_urban_by_road.tif"
data.add_layer(category='LPG', name='LPG Traveltime', 
               path=lpg_path, layer_type='raster', resample='average')

roads_path = r"..\2. Data\Other\Roads\Road_Networks_of_Nepal_OSM0.shp"
data.add_layer(category='LPG', name='Roads', 
               path=roads_path, layer_type='vector')

### Biogas

In [12]:
buffaloes = r"../2. Data\Supply\Global livestock\Buffaloes\5_Bf_2010_Da.tif"
cattles = r"../2. Data\Supply\Global livestock\Cattle\5_Ct_2010_Da.tif"
poultry = r"../2. Data\Supply\Global livestock\Chickens\5_Ch_2010_Da.tif"
goats = r"../2. Data\Supply\Global livestock\Goats\5_Gt_2010_Da.tif"
pigs = r"../2. Data\Supply\Global livestock\Pigs\5_Pg_2010_Da.tif"
sheeps = r"../2. Data\Supply\Global livestock\Sheep\5_Sh_2010_Da.tif"

for key, path in {'buffaloes': buffaloes,
                  'cattles': cattles,
                  'poultry': poultry,
                  'goats': goats,
                  'pigs': pigs,
                  'sheeps': sheeps}.items():
    data.add_layer(category='Biogas/Livestock', name=key, path=path,
                   layer_type='raster', resample='nearest', window=True, rescale=True)

In [13]:
temperature = r"../2. Data\Other\Temperature\TEMP.tif"
data.add_layer(category='Biogas', name='Temperature', path=temperature,
               layer_type='raster', resample='average', window=True)



### Relative Wealth Index

In [14]:
wealth_path = r"..\2. Data\Demand\Wealth Index\Relative Wealth Index.tif"
data.add_layer(category='Other', name='Wealth', 
               path=wealth_path,
               layer_type='raster', normalization='MinMax', inverse=False, resample='nearest')

### GDP per capita

In [15]:
data.add_layer(category='Other', name='GDP', 
               path=r"..\2. Data\Demand\GDP\dryadfilled.tif",
               layer_type='raster', normalization='MinMax', inverse=False, resample='nearest')

nodata = data.layers['Other']['GDP'].meta['nodata']
data.layers['Other']['GDP'].data[data.layers['Other']['GDP'].data==nodata] = np.nan
data.layers['Other']['GDP'].meta['nodata'] = np.nan

## 4. Mask reproject and align all required layers

In [16]:
data.align_layers(datasets='all')

In [17]:
data.reproject_layers(datasets={'Electricity': ['MG_points', 'MG_access', 'MV_lines'],
                                'LPG': ['Roads']})

In [18]:
data.get_distance_rasters(datasets={'Electricity': ['MG_points', 'MV_lines'],
                                    'LPG': ['Roads']})

In [19]:
## Calculate canopy cover
data.layers['Biomass']['Forest'].data = data.layers['Biomass']['Forest'].data / factor
data.layers['Biomass']['Forest'].data *= 100
data.layers['Biomass']['Forest'].data[data.layers['Biomass']['Forest'].data > 100] = 100

In [21]:
data.save_datasets(datasets='all')

## 5. Rasterize current cooking technologies data

In [26]:
cooking_techs = VectorLayer(path='../2. Data/Census 2021/cooking.geojson')
cooking_techs.reproject(data.layers['Demographics']['Population'].meta['crs'])
tech_names = {'Percentage Wood/firewood': 'Traditional_Biomass', 
              'Percentage Liquefied Petroleum Gas': 'LPG',
              'Percentage Electricity': 'Electricity', 
              'Percentage Bio gas': 'Biogas'}
cooking_techs.data.rename(columns=tech_names, inplace=True)
cooking_techs.data['Traditional_Biomass'] += cooking_techs.data['Percentage Cow dung']

for tech in tech_names.values():
    raster = cooking_techs.rasterize(raster=data.layers['Demographics']['Population'], 
                                     attribute=tech)
    raster.name = tech
    raster.save(f'{output_directory}/Demographics/Cooking/{tech}')

In [27]:
end = time.time()

diff = end - start
print('Execution time:', str(str(int(diff//60))) + ' min ' + str(int((diff)%60)) + ' sec')

Execution time: 12 min 23 sec
