## 9_urban_rural_segregation
### Categorizes buildings based on their location into a fitting urbanization status, uses the pre-processed polygons from 8_rural_urban_json_segregation
### This notebook needs to be executed twice together with 8_rural_urban_json_segregation to categorize building on a "overview" level, i.e., urban-suburban-rural categories and second time to categorize buildings into finer grainde categories using the "detailed" option SEGREGATION_TYPE

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "UTILS_BUCKET": "notebook-utils-bucket",
    "DB2_CONNECTION_STRING": "jdbc:db2://65beb513-5d3d-4101-9001-f42e9dc954b3.brt9d04f0cmqeb8u7740.databases.appdomain.cloud:30371/BLUDB:sslConnection=true;useJDBC4ColumnNameAndLabelSemantics=false;db2.jcc.charsetDecoderEncoder=3;",
    "DB2_USERNAME": "xxx",
    "DB2_PASSWORD": "xxx",
    "COUNTRY_TABLE": "FEATURES_DB_VIDA_EXTENDED",
    "COUNTRY_NAME": "Kenya",
    "SMOD_BUCKET": "xxx",
    "SEGREGATION_STYLE": "overview"
    }
    """


In [1]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [2]:
# Import necessary libraries
import pandas as pd
import jaydebeapi as jdbc
import jpype
import ibm_boto3
import gc
import io
import os
import shapely
import numpy as np
import rasterio as rio
from botocore.client import Config
from rasterio.plot import show
from tqdm import tqdm
from rasterio.mask import mask
import matplotlib.pyplot as plt
import rioxarray
from skimage import measure as M

In [None]:
#Add new country specific bounding box coordinates if needed
table_name = config["COUNTRY_TABLE"]
country = config["COUNTRY_NAME"]
output_SMOD_polygon_geojson =  country + "_segregated_cleaned.json"

min_latitude = 0
max_latitude = 0
min_longitude = 0
max_longitude = 0
if config["COUNTRY_TABLE"] == 'Kenya':
    min_latitude = -4.7075268
    max_latitude = 5.017422
    min_longitude = 33.9110224
    max_longitude = 41.8914004

segregation = {}
segregation_priorities = []
default_category = ""
db_col_name = ""

if config["SEGREGATION_STYLE"] == "overview":
    segregation = {
        'URBAN': [22, 23, 30],
        'SUBURBAN': [21],
        'RURAL': [12, 13],
    }
    segregation_priorities = ['URBAN', 'SUBURBAN']
    default_category = 'Rural'
    db_col_name = 'URBAN_SPLIT'

if config["SEGREGATION_STYLE"] == "detailed":
    segregation = {
        'URBAN_CENTER': [30],
        'DENSE_URBAN': [23],
        'SEMI_DENSE_URBAN': [22],
        'SUBURBAN_PERI_URBAN': [21],
        'RURAL_CLUSTER': [13],
        'LOW_DENSITY_RURAL': [12],
    }
    segregation_priorities = ['URBAN_CENTER', 'DENSE_URBAN', 'SEMI_DENSE_URBAN', 'SUBURBAN_PERI_URBAN', 'RURAL_CLUSTER', 'LOW_DENSITY_RURAL']
    default_category = 'Very Low Density Rural'
    db_col_name = 'GHSL_SMOD'

segregation_names = {}
segregation_names['URBAN'] = 'Urban'
segregation_names['SUBURBAN'] = 'Suburban'
segregation_names['RURAL'] = 'Rural'
segregation_names['URBAN_CENTER'] = 'Urban Center'
segregation_names['DENSE_URBAN'] = 'Dense Urban Cluster'
segregation_names['SEMI_DENSE_URBAN'] = 'Semi-dense Urban Cluster'
segregation_names['SUBURBAN_PERI_URBAN'] = 'Suburban or Per-urban'
segregation_names['RURAL_CLUSTER'] = 'Rural Cluster'
segregation_names['LOW_DENSITY_RURAL'] = 'Low Density Rural'

In [3]:
#load db2jcc4.jar and polygons json
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              ibm_auth_endpoint=config["COS_AUTH_ENDPOINT_URL"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])

response = cos_client.list_objects_v2(Bucket=config["UTILS_BUCKET"])

try:
    for obj in response['Contents']:
        name = obj['Key']
        streaming_body_1 = cos_client.get_object(Bucket=config["UTILS_BUCKET"], Key=name)['Body']
        print("Copying to localStorage :  " + name)
        with io.FileIO(name, 'w') as file:
            for i in io.BytesIO(streaming_body_1.read()):
                file.write(i)
    
    from utils import *
    print('External utils succesfully imported')
except Exception as e:
    print('Error occured: ', e)

Copying to localStorage :  Kenya_urban_suburban.json
Copying to localStorage :  db2jcc4.jar
Copying to localStorage :  utils.py
External utils succesfully imported


In [4]:
# connect to the IBM DB2 function
def connect_to_db():
    '''
        Connect to the IBM DB2 database
    '''
    
    jar = 'db2jcc4.jar'
    os.environ['CLASSPATH'] = jar

    args='-Djava.class.path=%s' % jar
    jvm_path = jpype.getDefaultJVMPath()
    try:
        jpype.startJVM(jvm_path, args)
    except Exception as e:
        print('startJVM exception: ', e)
        
    if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
        jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
        
    
    conn = jdbc.connect(
                'com.ibm.db2.jcc.DB2Driver',
                config['DB2_CONNECTION_STRING'],
                [config["DB2_USERNAME"], config["DB2_PASSWORD"]],
                'db2jcc4.jar')

    curs = conn.cursor()

    return curs

curs = connect_to_db()

startJVM exception:  startJVM() got an unexpected keyword argument 'convert_strings'


In [None]:
# select only coordinates of buildings since they are indexed
columns = [
    'latitude',
    'longitude'
]
#slices if don't work by CLASSIFICATION_TYPE or TIFF_FILE
statement = f"""SELECT {', '.join(columns)} FROM USER1.{table_name}"""

curs.execute(statement)

buildings = pd.DataFrame(curs.fetchall(), columns=columns)



In [7]:
#Function to update db2
def upd_split_db2(lat, lon, segment, cursor):
    try:
        sql = f"""
        UPDATE "USER1"."{table_name}"
            SET
                "{db_col_name}" = '{segment}'
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)



In [None]:
# Fetch the geoJSON file containing polygons and process each polygon one by one
streaming_body = cos_client.get_object(Bucket=config["SMOD_BUCKET"], Key=output_SMOD_polygon_geojson)['Body']
print("Downloading to local storage :  " + output_SMOD_polygon_geojson)
with io.FileIO(output_SMOD_polygon_geojson, 'w') as file:
    for i in io.BytesIO(streaming_body.read()):
        file.write(i)

#Open Kenya_segregated polygons
with open(output_SMOD_polygon_geojson) as s_f:
    geojson = json.load(s_f)

dfs_list = []

#Distinguish buildings to Urban/Suburban/Rural, based on what polygon they are inside. If overlap occurs - it will be Urban. By default they will be Rural.
def process_buildings(feature):
    polygon_coordinates = feature['geometry']['coordinates'][0]
    polygon = shapely.Polygon(polygon_coordinates)
    min_lon, min_lat, max_lon, max_lat = polygon.bounds
    filtered_buildings = buildings.where(
            (buildings.longitude >= min_lon) &
            (buildings.longitude <= max_lon) &
            (buildings.latitude >= min_lat) &
            (buildings.latitude <= max_lat)
        ).dropna()
    
    filtered_buildings['segment'] = segregation_names[feature['properties']['seg_type']]

    #update db2 without assigning values to dataframe
    for row in tqdm(filtered_buildings.itertuples(), total=len(filtered_buildings), desc='ingestion_data'):
        if polygon.contains(shapely.Point(row.longitude, row.latitude)):
            upd_split_db2(row.latitude, row.longitude, row.segment, curs)


for feature in geojson['features']:
    process_buildings(feature)
    


In [None]:
# Set default urbanization values
latitudes = np.arange(min_latitude, max_latitude, 0.1)
longitudes = np.arange(min_longitude, max_longitude, 0.1)

for lat_start, lat_end in zip(latitudes[:-1], latitudes[1:]):
    for lon_start, lon_end in zip(longitudes[:-1], longitudes[1:]):
        try:
            sql = f"""
            UPDATE "USER1"."{table_name}"
            SET "{db_col_name}" = '{default_category}'
            WHERE ("{db_col_name}" = '' OR "{db_col_name}" IS NULL) AND
            (LATITUDE >= {lat_start} AND LATITUDE < {lat_end}) AND
            (LONGITUDE >= {lon_start} AND LONGITUDE < {lon_end})
            """
            curs.execute(sql)
        except Exception as e:
            print(e, sql)