## geoJSON_preparation_subdistrict
### This notebook generates the geoJSON files related to subdistricts used in the OBI tool in the "Select an area" option
### last_processed_idx can be used to resume a failed computation

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "PRECREATED_GEOJSON_BUCKET": "counties-geojsons",
    "DB2_CONNECTION_STRING": "jdbc:db2://65beb513-5d3d-4101-9001-f42e9dc954b3.brt9d04f0cmqeb8u7740.databases.appdomain.cloud:30371/BLUDB:sslConnection=true;useJDBC4ColumnNameAndLabelSemantics=false;db2.jcc.charsetDecoderEncoder=3;",
    "DB2_USERNAME": "xxx",
    "DB2_PASSWORD": "xxx",
    "COUNTRY_TABLE": "FEATURES_DB_VIDA_EXTENDED"
    }
    """
    

In [11]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [12]:
# Import necessary libraries
import geopandas as gpd

import pandas as pd
import numpy as np
import shapely
from collections import Counter
import jaydebeapi as jdbc
import jpype
import os
from tqdm import tqdm

from botocore.client import Config
from pyproj import Geod
import ibm_boto3

In [13]:
# init S3 client in order to work with last tiff file version
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              ibm_auth_endpoint=config["COS_AUTH_ENDPOINT_URL"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])
geod = Geod(ellps="WGS84")

In [24]:
def connect_to_db():

    jar = 'db2jcc4.jar'
    os.environ['CLASSPATH'] = jar

    args='-Djava.class.path=%s' % jar
    jvm_path = jpype.getDefaultJVMPath()
    try:
        jpype.startJVM(jvm_path, args)
    except Exception as e:
        print('startJVM exception: ', e)
        
    if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
        jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
        
    # create JDBC connection
    conn = jdbc.connect(
                'com.ibm.db2.jcc.DB2Driver',
                config['DB2_CONNECTION_STRING'],
                [config["DB2_USERNAME"], config["DB2_PASSWORD"]],
                'db2jcc4.jar')
    
    return conn

conn = connect_to_db()
cursor = conn.cursor()

startJVM exception:  JVM is already started


In [None]:
shapefile = gpd.read_file(open('boundaries/MAHARASHTRA_SUBDISTRICTS.geojson'))
shapefile['country'] = ['Maharashtra' for _ in  range(len(shapefile))]
shapefile

In [16]:
districts = [
    'Ahmadnagar', 
    'Aurangabad', 
    'Kolhapur', 
    'Nagpur', 
    'Nashik', 
    'Pune', 
    'Satara', 
    'Thane',
    'Solapur'
]

selected_districts = shapefile[shapefile.dtname.isin(districts)].explode(index_parts=True)

In [18]:
def fetch_builings_in_bbox(lon_min, lon_max, lat_min, lat_max):

    columns = [
        'latitude',
        'longitude',
        'polygon_coordinates',
        'height',
        'ml_confidence',
        'area_in_meters',
        'urban_split',
        'classification_source',
        'classification_type',
        'footprint_source',
        'osm_id',
        'ghsl_smod',
        'floors',
        'osm_type',
        'gfa_in_meters',
        'building_faces',
        'perimeter_in_meters',
        'elec_access_percent',
        'elec_consumption_kwh_month',
        'elec_consumption_std_kwh_month'
    ]
    sql = f"""
        SELECT {', '.join(columns)} FROM USER1.FEATURES_DB_MAHARASHTRA
        WHERE 
            (LATITUDE >= {lat_min}) AND 
            (LATITUDE <= {lat_max}) AND 
            (LONGITUDE >= {lon_min}) AND 
            (LONGITUDE <= {lon_max})
            """
    # cursor = conn.cursor()
    cursor.execute(sql)
    data = cursor.fetchall()

    gpd.options.display_precision = 7 

    df = pd.DataFrame(data=data, columns=columns)

    convert_dict = {
                    'latitude': float,
                    'longitude': float,
                    'polygon_coordinates': str,
                    'height': float,
                    'ml_confidence': float,
                    'area_in_meters': float,
                    'urban_split': str,
                    'classification_source': str,
                    'classification_type': str,
                    'footprint_source': str,
                    'osm_id': int,
                    'ghsl_smod': str,
                    'osm_type': str,
                    'floors': int,
                    'gfa_in_meters': float,
                    'building_faces': int,
                    'perimeter_in_meters': float,
                    'elec_access_percent': float,
                    'elec_consumption_kwh_month': float,
                    'elec_consumption_std_kwh_month': float
                    }

    df = df.astype(convert_dict)
    # df['geometry'] = gpd.GeoSeries.from_wkt(df['polygon_coordinates'])
    # df = df.drop(columns=['polygon_coordinates'])

    # df = gpd.GeoDataFrame(
    #     df, geometry=df.geometry, crs="EPSG:4326"
    # )

    # df = df.where(~df['geometry'].isna()).dropna()

    df 
    
    return df


def create_geojson(df, county_metadata):

    if type(county_metadata.geometry) == shapely.geometry.multipolygon.MultiPolygon:
        geometry = shapely.concave_hull(county_metadata.geometry, ratio=1)
        county_coordinates = geometry.exterior.coords._coords.tolist()
        county_area = abs(geod.geometry_area_perimeter(geometry)[0])
    else:
        county_coordinates = county_metadata.geometry.exterior.coords._coords.tolist()
        county_area = abs(geod.geometry_area_perimeter(county_metadata.geometry)[0])

    res_nonres_stats = dict(Counter(df['classification_type']))
    rural_urban_stats = dict(Counter(df['urban_split']))

    county_properties = {
        'count_of_buildings': len(df),
        'count_of_buildings_res': res_nonres_stats['res'],
        'count_of_buildings_nonRes': res_nonres_stats['non-res'],
        'square_area_of_county': county_area,
        'square_area_of_buildings': df.area_in_meters.sum(),
        'square_area_res': df[df['classification_type'] == 'res'].area_in_meters.sum(),
        'square_area_nonRes': df[df['classification_type'] == 'non-res'].area_in_meters.sum(),
        'model_confidence_res': df[(df['classification_type'] == 'res') & (df['classification_source'] == 'classification_model')].ml_confidence.mean(),
        'model_confidence_nonRes': 1 - df[(df['classification_type'] == 'non-res') & (df['classification_source'] == 'classification_model')].ml_confidence.mean(),
        'height_avg': df.height.mean(),
        'height_avg_res': df[df['classification_type'] == 'res'].height.mean(),
        'height_avg_nonRes': df[df['classification_type'] == 'non-res'].height.mean(),
        'county_polygon_coordinates': county_coordinates
    }

        
    if 'Rural' in rural_urban_stats.keys():
        county_properties['rural'] = rural_urban_stats['Rural']

    if 'Urban' in rural_urban_stats.keys():
        county_properties['urban'] = rural_urban_stats['Urban']

    if 'Suburban' in rural_urban_stats.keys():
        county_properties['suburban'] = rural_urban_stats['Suburban']
    

    features = []
    for row in df.itertuples():
        try:
            polygon = shapely.from_wkt(row.polygon_coordinates)
            feature = {
                "type": "Feature",
                "properties": {
                    "latitude": row.latitude,
                    "longitude": row.longitude,
                    "height": row.height,
                    "area_in_meters": row.area_in_meters,
                    "classification_type": row.classification_type,
                    "classification_source": row.classification_source,
                    "footprint_source": row.footprint_source,
                    "urban_split": row.urban_split,
                    "ghsl_smod": row.ghsl_smod,
                    "floors": row.floors,
                    "osm_type": row.osm_type,
                    "gfa_in_meters": row.gfa_in_meters,
                    "building_faces": row.building_faces,
                    "perimeter_in_meters": row.perimeter_in_meters,
                    "elec_access_percent": row.elec_access_percent,
                    "elec_consumption_kwh_month": row.elec_consumption_kwh_month,
                    "elec_consumption_std_kwh_month": row.elec_consumption_std_kwh_month
                    },  
                "geometry": {
                    "coordinates": [polygon.exterior.coords._coords.tolist()],
                    "type": "Polygon"
                }
            }

            if row.classification_source == 'classification_model':
                if row.classification_type == 'res':
                    feature['properties']['ml_confidence'] = round(row.ml_confidence, 5)
                else:
                    feature['properties']['ml_confidence'] = round(1 - row.ml_confidence, 5)

            if row.footprint_source == 'osm':
                feature['properties']['osm_id'] = row.osm_id


            features.append(feature)

        except Exception as e:
            print(e)


    geojson = {
    "type": "FeatureCollection",
    "county_properties": county_properties,
    "features": features
    }

    filename = f'{county_metadata.country}_{county_metadata.district}.json'
    file_path = f'geojsons/{filename}'
    with open(file_path, "w") as outfile: 
        json.dump(geojson, outfile)

    cos_client.upload_file(
        Filename=file_path,
        Bucket=config["PRECREATED_GEOJSON_BUCKET"],
        Key=filename.replace("'", '').replace(" ", '_'),
        ExtraArgs={
                'ContentDisposition': 'attachment',
                }
        )
    print(f'File {filename} successfully uploaded to the COS {config["PRECREATED_GEOJSON_BUCKET"]} bucket')
    # return geojson

### Main for cycle for interating through district polygons

In [20]:
last_processed_idx = -1

In [21]:
conn = connect_to_db()
cursor = conn.cursor()

startJVM exception:  JVM is already started


In [None]:
for idx, county_metadata in enumerate(selected_districts.itertuples()):
    if idx > last_processed_idx:
        print()
        print(f'Processing subdistrict: {county_metadata.sdtname} {idx+1} of {len(selected_districts)}')
        # (minx, miny, maxx, maxy)
        min_lon, min_lat, max_lon, max_lat = county_metadata.geometry.bounds

        df = fetch_builings_in_bbox(min_lon, max_lon, min_lat, max_lat)

        buildings_in_polygon = []
        df['buildings_in_polygon'] = [county_metadata.geometry.contains(shapely.Point(row.longitude, row.latitude)) for row in tqdm(df.itertuples(), total=len(df), desc='Filtering buildings')]

        df = df[df.buildings_in_polygon == True]
        
        # fill None values in certain columns, in order not to get error on frontend side
        df['ml_confidence'] = df['ml_confidence'].fillna('-')
        df['height'] = df['height'].fillna('-')
        df['floors'] = df['floors'].fillna('-')
        df['osm_type'] = df['osm_type'].fillna('')
        df['gfa_in_meters'] = df['gfa_in_meters'].fillna('-')
        df['building_faces'] = df['building_faces'].fillna('-')
        df['perimeter_in_meters'] = df['perimeter_in_meters'].fillna('-')
        df['elec_access_percent'] = df['elec_access_percent'].fillna('-')
        df['elec_consumption_kwh_month'] = df['elec_consumption_kwh_month'].fillna('-')
        df['elec_consumption_std_kwh_month'] = df['elec_consumption_std_kwh_month'].fillna('-')

        print(f'buildings in polygoon {len(df)}')
        create_geojson(df, county_metadata)

        last_processed_idx = idx
    

In [147]:
file_path = f'geojson_subdistricts_map.json'

subdistricts_mapping = json.loads(open(file_path, "rb").read())

for idx, county_metadata in enumerate(selected_districts.itertuples()):

    if county_metadata.country not in subdistricts_mapping.keys():
        subdistricts_mapping[county_metadata.country] = {}

    district = county_metadata.dtname.replace("'", '').replace(" ", '_')
    subdistrict = county_metadata.sdtname.replace('(', '').replace(')', '').replace(' ', '_')
    subdistrict_sufix = '' if county_metadata.Index[1] == 0 else f'_{county_metadata.Index[1] + 1}'

    filename = f'{county_metadata.country}_{district}_{subdistrict}{subdistrict_sufix}.json'

    if type(subdistricts_mapping[county_metadata.country][district]) == str:
        subdistricts_mapping[county_metadata.country][district] = {f'{subdistrict}{subdistrict_sufix}': filename}

    else:
        subdistricts_mapping[county_metadata.country][district][f'{subdistrict}{subdistrict_sufix}'] = filename
        

subdistricts_mapping
file_path = f'geojson_subdistricts_map.json'
with open(file_path, "w") as outfile: 
    json.dump(subdistricts_mapping, outfile)


In [70]:
for idx, county_metadata in enumerate(shapefile.itertuples()):
    print('Maharashtra_'+county_metadata.district.replace("'", '').replace(" ", '_')+'.json')

Maharashtra_Garhchiroli.json
Maharashtra_Gondiya.json
Maharashtra_Latur.json
Maharashtra_Pune.json
Maharashtra_Sindhudurg.json
Maharashtra_Thane.json
Maharashtra_Wardha.json
Maharashtra_Washim.json
Maharashtra_Yavatmal.json
Maharashtra_Kolhapur.json
Maharashtra_Nagpur.json
Maharashtra_Parbhani.json
Maharashtra_Ahmadnagar.json
Maharashtra_Akola.json
Maharashtra_Aurangabad.json
Maharashtra_Bid.json
Maharashtra_Buldana.json
Maharashtra_Chandrapur.json
Maharashtra_Dhule.json
Maharashtra_Nanded.json
Maharashtra_Nandurbar.json
Maharashtra_Hingoli.json
Maharashtra_Nashik.json
Maharashtra_Osmanabad.json
Maharashtra_Raigarh.json
Maharashtra_Ratnagiri.json
Maharashtra_Solapur.json
Maharashtra_Sangli.json
Maharashtra_Satara.json
Maharashtra_Amravati.json
Maharashtra_Bhandara.json
Maharashtra_Mumbai_Suburban.json
Maharashtra_Jalgaon.json
Maharashtra_Jalna.json
Maharashtra_Mumbai.json


In [155]:
cos_client.upload_file(
        Filename='geojsons_subdistricts/Maharashtra_Solapur_Mangalvedhe.json',
        Bucket=config["PRECREATED_GEOJSON_BUCKET"],
        Key='Maharashtra_Solapur_Mangalvedhe.json',
        ExtraArgs={
                'ContentDisposition': 'attachment',
                }
        )
# print(f'File {file_path} successfully uploaded to the COS {config["PRECREATED_GEOJSON_BUCKET"]} bucket')