## 7_building_height_calculation
### The source for building height calculation arrives from DLR and is uploaded to a dedicated bucket in advance manually. Given that this source is not public at the current moment it is handled manually, the code consequently reads the TIFF file uploaded to the bucket as the input
### This notebook estimates the heights of buildings based on the provided raster layer (10x10m per pixel resolution)

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "UTILS_BUCKET": "notebook-utils-bucket",
    "HEIGHTS_TIFF_FILENAME": "WSF3Dv3_Kenya.tif",
    "DB2_CONNECTION_STRING": "jdbc:db2://65beb513-5d3d-4101-9001-f42e9dc954b3.brt9d04f0cmqeb8u7740.databases.appdomain.cloud:30371/BLUDB:sslConnection=true;useJDBC4ColumnNameAndLabelSemantics=false;db2.jcc.charsetDecoderEncoder=3;",
    "DB2_USERNAME": "xxx",
    "DB2_PASSWORD": "xxx",
    "COUNTRY_TABLE": "FEATURES_DB_VIDA_EXTENDED",
    "BUCKET_TIFF": "buildings-height-tiffs",
    "AREA_THRESHOLD": 20
    }
    """


In [1]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [3]:
# Import necessary libraries
import io
from PIL import Image
import ibm_boto3
import jaydebeapi as jdbc
import jpype
from botocore.client import Config
import numpy as np
import configparser
import os
import sys
from ibm_cloud_sdk_core import ApiException
from ibmcloudant.cloudant_v1 import CloudantV1, Document, BulkDocs
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import pandas as pd
import geopandas as gpd
import random
import time
import base64
import shutil
import threading
from collections import Counter
from tqdm import tqdm
from datetime import datetime
import os
import rasterio
from rasterio.windows import Window
import gc
from matplotlib.path import Path
import matplotlib.pyplot as plt


# init S3 client in order to work with last tiff file version
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              ibm_auth_endpoint=config["COS_AUTH_ENDPOINT_URL"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])


# import external utils library
response = cos_client.list_objects_v2(Bucket=config["UTILS_BUCKET"])

try:
    for obj in response['Contents']:
        name = obj['Key']
        streaming_body_1 = cos_client.get_object(Bucket=config["UTILS_BUCKET"], Key=name)['Body']
        print("Copying to localStorage :  " + name)
        with io.FileIO(name, 'w') as file:
            for i in io.BytesIO(streaming_body_1.read()):
                file.write(i)
    
    from utils import *
    print('External utils succesfully imported')
except Exception as e:
    print('Error occured: ', e)

Copying to localStorage :  db2jcc4.jar
Copying to localStorage :  utils.py
External utils succesfully imported


In [4]:
# connect to the IBM DB2 function
def connect_to_db():

    jar = 'db2jcc4.jar'
#     os.environ['JAVA_HOME'] = '/usr/libexec/java_home'
    os.environ['CLASSPATH'] = jar

    args='-Djava.class.path=%s' % jar
    jvm_path = jpype.getDefaultJVMPath()
    try:
        jpype.startJVM(jvm_path, args)
    except Exception as e:
        print('startJVM exception: ', e)
        
    if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
        jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
        
    # create JDBC connection
    conn = jdbc.connect(
                'com.ibm.db2.jcc.DB2Driver',
                config['DB2_CONNECTION_STRING'],
                [config["DB2_USERNAME"], config["DB2_PASSWORD"]],
                'db2jcc4.jar')
    

    return conn

DB2_connection = connect_to_db()

  if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():


In [5]:
# assign config necessary variables
BUCKET_TIFF = config["BUCKET_TIFF"]
table_name = config["COUNTRY_TABLE"]
heights_tiff_name = config["HEIGHTS_TIFF_FILENAME"]
area_threshold = config["AREA_THRESHOLD"]

In [6]:
# paths for local storing of tiff files
path_to_tif_folder = 'tiff/' 
processed_tiff = 'processd_tiffs/'

# clear files in directories if exist
try:
    shutil.rmtree(path_to_tif_folder, ignore_errors=True)
    shutil.rmtree(processed_tiff, ignore_errors=True)
except Exception as e:
    print(e)

print("Recreate /tiff directories")
os.makedirs(os.path.dirname(path_to_tif_folder), exist_ok=True)
os.makedirs(os.path.dirname(processed_tiff), exist_ok=True)

# download heights tiff file
streaming_body = cos_client.get_object(Bucket=BUCKET_TIFF, Key=heights_tiff_name)['Body']      
with io.FileIO(path_to_tif_folder + heights_tiff_name, 'w') as file:
    print("Copying to localStorage: " + path_to_tif_folder + heights_tiff_name)
    for i in io.BytesIO(streaming_body.read()):
        file.write(i)

print('Successfully downloaded')

Recreate /tiff directories
Copying to localStorage: tiff/WSF3Dv3_Kenya.tif
Successfully downloaded


In [7]:
#open heights tiff 
dat = rasterio.open(os.path.join(path_to_tif_folder, heights_tiff_name))
profile = dat.profile.copy()
profile.update(compress='lzw')
print(profile)

#divide tiff to tiles
tiff_width = profile['width']
tiff_height = profile['height']

tile_width = int(tiff_width / 10)
tile_height = int(tiff_height / 10)

print(f'tile_width: {tile_width}, tile_height: {tile_height}')
# define overlap between tiles
overlap = 500

columns_amount = int(tiff_width / tile_width) if tiff_width % tile_width == 0 else int(tiff_width / tile_width) + 1
rows_amount = int(tiff_height / tile_height) if tiff_height % tile_height == 0 else int(tiff_height / tile_height) + 1
print(f'TIFf image wiil be divided to {rows_amount} rows and {columns_amount} cols')

images_coords = []

for col_idx in range(1, columns_amount + 1):
    
    row_start = max(tile_width * (col_idx - 1) - overlap, 0)
    
    if col_idx != columns_amount:
        
        row_limits = [row_start, tile_width * col_idx]
    elif col_idx == columns_amount:
        row_limits = [row_start, tiff_width]
    
    for row_idx in range(1, rows_amount + 1):
        
        col_start = max(tile_height * (row_idx - 1) - overlap, 0)
        
        if row_idx != columns_amount:
            col_limits = [col_start, tile_height * row_idx]
        elif row_idx == columns_amount:
            col_limits = [col_start, tiff_height]
            
        coords = [col_limits, row_limits]
        images_coords.append(coords)
print(len(images_coords))

{'driver': 'GTiff', 'dtype': 'float64', 'nodata': None, 'width': 111543, 'height': 133808, 'count': 1, 'crs': CRS.from_epsg(4326), 'transform': Affine(8.983152841195211e-05, 0.0, 31.98999541430869,
       0.0, -8.983152841195211e-05, 6.010088576873247), 'blockysize': 1, 'tiled': False, 'compress': 'lzw', 'interleave': 'band'}
tile_width: 11154, tile_height: 13380
TIFf image wiil be divided to 11 rows and 11 cols
121


In [None]:
# Prepare GeoDataframe of buildings from bounding box
def fetch_builings_in_bbox(lon_min, lon_max, lat_min, lat_max):

    sql = f"""
        SELECT DISTINCT ID, POLYGON_COORDINATES, AREA_IN_METERS FROM USER1.{table_name} 
        WHERE 
            (LATITUDE >= {lat_min}) AND 
            (LATITUDE <= {lat_max}) AND 
            (LONGITUDE >= {lon_min}) AND 
            (LONGITUDE <= {lon_max}) AND
            AREA_IN_METERS > {area_threshold}
            """
    cursor = DB2_connection.cursor()
    cursor.execute(sql)
    data = cursor.fetchall()

    gpd.options.display_precision = 7 

    df = pd.DataFrame(data=data, columns=['doc_id', 'polygon_coordinates'])

    convert_dict = {
                    'doc_id': str,
                    'polygon_coordinates': str,
                    'area_in_meters': float
                    }

    df = df.astype(convert_dict)
    df['geometry'] = gpd.GeoSeries.from_wkt(df['polygon_coordinates'])
    df = df.drop(columns=['polygon_coordinates'])

    df = gpd.GeoDataFrame(
        df, geometry=df.geometry, crs="EPSG:4326"
    )

    df = df.where(~df['geometry'].isna()).dropna()

    df["corresponding_tiff"] = ['NA' for _ in range(len(df))]
    df 
    
    return df

In [None]:
def upd_default_height_in_bbox(lon_min, lon_max, lat_min, lat_max):

    try:
        sql = f"""
        UPDATE "USER1"."{table_name}"
            SET
                "HEIGHT" = 3,   
                "HEIGHT_MEDIAN" = 3, 
                "HEIGHT_MEAN" = 3, 
                "HEIGHT_MAX" = 3,
                "FLOORS" = 1,
                "GFA_IN_METERS" = "AREA_IN_METERS"
            WHERE 
                (LATITUDE >= {lat_min}) AND 
                (LATITUDE <= {lat_max}) AND 
                (LONGITUDE >= {lon_min}) AND 
                (LONGITUDE <= {lon_max}) AND
                AREA_IN_METERS <= {area_threshold}
        """
        cursor = DB2_connection.cursor()
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)

In [9]:
def upd_height_db2(lat, lon, height, height_max, height_mean, height_median, floors, GFA_in_meters, cursor):

    try:
        sql = f"""
        UPDATE "USER1"."{table_name}"
            SET
                "HEIGHT" = {height},   
                "HEIGHT_MEDIAN" = {height_median}, 
                "HEIGHT_MEAN" = {height_mean}, 
                "HEIGHT_MAX" = {height_max},
                "FLOORS" = {floors},
                "GFA_IN_METERS" = {GFA_in_meters}
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)

### Loop through all tiles and calculate heights stats for all available buildings in tile

In [None]:
cursor = DB2_connection.cursor()
dfs = []
t1 = time.time()

# loop through tiles coords
for idx, coords in enumerate(images_coords):
    init_time = time.time()
    with rasterio.open(os.path.join(path_to_tif_folder, heights_tiff_name)) as src:
        
        # read tiff metadata by coords in order to prepare filtered dataframe
        print(coords)
        
        col_off = coords[1][0]
        row_off = coords[0][0]
        
        width = coords[1][1] - coords[1][0]
        height = coords[0][1] - coords[0][0]
        
        tiff_data = src.read(1, window=Window(col_off, row_off, width, height))
        
        lon_upper_left, lat_upper_left = src.xy(coords[0][0], coords[1][0])
        lon_down_right, lat_down_right = src.xy(coords[0][1], coords[1][1])

        lons_sorted = sorted([lon_upper_left, lon_down_right])
        lats_sorted = sorted([lat_upper_left, lat_down_right])
        
        lon_min = lons_sorted[0]
        lon_max = lons_sorted[1]

        lat_min = lats_sorted[0]
        lat_max = lats_sorted[1]
        
        areas_covered_by_tifs = create_bounds_dict(path_to_tifs=path_to_tif_folder)

        # set up default height (3m - 1 floor) for buildings under the threshold (20 square meters)
        upd_default_height_in_bbox(lon_min, lon_max, lat_min, lat_max)

        # fetch all buildings larger than the threshold (20 square meters) to estimate its height
        df = fetch_builings_in_bbox(lon_min, lon_max, lat_min, lat_max)

        # coordinates of current tile
        col_off = max(coords[1][0] - 1, 0)
        row_off = max(coords[0][0] - 1, 0)
        width = min(coords[1][1] - coords[1][0] + 1, tiff_width)
        height = min(coords[0][1] - coords[0][0] + 1, tiff_height)
        
        # read tile grayscale layer
        tiff_data = src.read(1, window=Window(col_off, row_off, width, height))
        print(f"Images revealed: {len(df)}")
        
        # loop through building centroids inside tile
        for index, row, in tqdm(df.iterrows(), total=len(df), desc='Height calculation'):
            try:

                # lat lon to pixel transformations
                pixel_coordinates = get_pixel_coordinates(row.geometry, areas_covered_by_tifs, src)
                polygon_coordinates = [[pixel_coords[0] - row_off, pixel_coords[1] - col_off] for pixel_coords in pixel_coordinates]
                
                margin = 0
                
                rowcolminmax = get_min_max_values_of_row_col(pixel_coordinates=polygon_coordinates)
                
                img_width = rowcolminmax['rowminmax'][1] - rowcolminmax['rowminmax'][0] 
                img_height = rowcolminmax['colminmax'][1] - rowcolminmax['colminmax'][0]
                
                row_start = rowcolminmax['rowminmax'][0]
                row_end = rowcolminmax['rowminmax'][1]
                col_start = rowcolminmax['colminmax'][0]
                col_end = rowcolminmax['colminmax'][1]
                img_array_pre = np.array(tiff_data[row_start : row_end, col_start : col_end])
                

                polygon_coordinates = offset_polygon_coords(polygon_coordinates)
                rowcolminmax = get_min_max_values_of_row_col(pixel_coordinates=polygon_coordinates)

                img_width = rowcolminmax['rowminmax'][1] - rowcolminmax['rowminmax'][0] 
                img_height = rowcolminmax['colminmax'][1] - rowcolminmax['colminmax'][0]
                row_start = rowcolminmax['rowminmax'][0]
                row_end = rowcolminmax['rowminmax'][1]
                col_start = rowcolminmax['colminmax'][0]
                col_end = rowcolminmax['colminmax'][1]

                # cut building image from tile
                img_array_pre = np.array(tiff_data[row_start : row_end, col_start : col_end])
                
                # check if cropped image correcspond with init image shape,
                # if not place cropped image on appropriate image matriz size
                if img_array_pre.shape != (img_width, img_height):
                        print('building polygon out of tiff')
                        np_nan_matrix = np.empty((img_width, img_height))
                        np_nan_matrix.fill(np.nan)
                        np_nan_matrix[:img_array_pre.shape[0], :img_array_pre.shape[1]] = img_array_pre
                        img_array_pre = np_nan_matrix
                
                # extract building by polygon coords
                absolule_polygon_coordinates = [[pixel_coords[0] - row_start, pixel_coords[1] - col_start] for pixel_coords in polygon_coordinates]
                poly_path=Path(absolule_polygon_coordinates)
                x, y = np.mgrid[:img_height, :img_width]
                coors = np.hstack((x.reshape(-1, 1), y.reshape(-1,1)))
                mask = poly_path.contains_points(coors).reshape(img_height, img_width).T
                
                # create zeros mask
                img_masked=np.zeros((img_width, img_height),dtype=img_array_pre.dtype)

                # put image on zeros mask
                img_masked[mask]=img_array_pre[mask]

                # extract image as list of non zero values
                img_masked_list = list(filter(lambda num: num != 0, img_masked.flatten(order='C')))

                #calculate statistics 
                
                height_categorized = 3 if len(img_masked_list) == 0 else np.median(img_masked_list) // 3 * 3 + 3
                floors = int(height_categorized / 3)
                gfa = round(row.area_in_meters * floors, 5)
                
                df.at[index, 'height_mean_by_poly'] = 0 if len(img_masked_list) == 0 else np.mean(img_masked_list)
                df.at[index, 'height_median_by_poly'] = 0 if len(img_masked_list) == 0 else np.median(img_masked_list)
                df.at[index, 'height_max_by_poly'] = 0 if len(img_masked_list) == 0 else np.max(img_masked_list)
                df.at[index, 'height_categorized'] = height_categorized

                df.at[index, 'floors'] = floors
                df.at[index, 'GFA_in_meters'] = gfa
                
            except Exception as e:
                pass
                
        #Update changed height values        
        for row in tqdm(df.itertuples(), total=len(df), desc='ingestion_data'):
            lon, lat = row.doc_id.split(':')
            try:
                upd_height_db2(lat, lon, row.height_categorized, row.height_max_by_poly, row.height_mean_by_poly, row.height_median_by_poly, row.floors, row.GFA_in_meters, cursor)
            except Exception as e:
                print(f"Error of database: {e}")
                DB2_connection = connect_to_db()
                cursor = DB2_connection.cursor()
                upd_height_db2(lat, lon, row.height_categorized, row.height_max_by_poly, row.height_mean_by_poly, row.height_median_by_poly, row.floors, row.GFA_in_meters, cursor)
        print(f'Image tile processed, time took: {time.strftime("%H:%M:%S", time.gmtime(int(time.time() - init_time)))}')
#         if idx == 16:
#             break
        
        
        
print("")
print(f'All tiles processed, time took: {time.strftime("%H:%M:%S", time.gmtime(int(time.time() - t1)))}')
