## OSM_labeled_data_DB2_Update
### Ingesting fine-grained label for buildings based on OSM building, node and area tags
### Please see below the definition JSONs of the particular building types which are ingested, change them if needed

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "DB2_CONNECTION_STRING": "jdbc:db2://65beb513-5d3d-4101-9001-f42e9dc954b3.brt9d04f0cmqeb8u7740.databases.appdomain.cloud:30371/BLUDB:sslConnection=true;useJDBC4ColumnNameAndLabelSemantics=false;db2.jcc.charsetDecoderEncoder=3;",
    "DB2_USERNAME": "xxx",
    "DB2_PASSWORD": "xxx",
    "COUNTRY_TABLE": "FEATURES_DB_MAHARASHTRA",
    "COUNTRY_NAME": "Maharashtra",
    "OSM_DATA": "xxx"
    }
    """


In [1]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [None]:
#Install neccessary packages
#! pip install overpass
#! pip install requests
#! pip install shapely
#! pip install rioxarray
#! pip install JayDeBeApi

In [3]:
# Import necessary libraries
import pandas as pd
import overpass
import geojson
import requests
import getpass
from shapely.geometry import Point, Polygon, mapping, shape
import shapely
from shapely import wkt
import json
# import geopandas as gpd
import jaydebeapi as jdbc
import jpype
import ibm_boto3
import gc
import io
import os
from botocore.client import Config
from tqdm import tqdm
from skimage import measure as M
#import cv2

  from pandas.core import (


In [None]:
searched_area = config["COUNTRY_NAME"]
sql_tablename = config["COUNTRY_TABLE"]

In [4]:
#load db2jcc4.jar
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              ibm_auth_endpoint=config["COS_AUTH_ENDPOINT_URL"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])

response = cos_client.list_objects_v2(Bucket=config["OSM_DATA"])

try:
    for obj in response['Contents']:
        name = obj['Key']
        streaming_body_1 = cos_client.get_object(Bucket=config["OSM_DATA"], Key=name)['Body']
        print("Copying to localStorage :  " + name)
        with io.FileIO(name, 'w') as file:
            for i in io.BytesIO(streaming_body_1.read()):
                file.write(i)
    
    from utils import *
    print('External utils succesfully imported')
except Exception as e:
    print('Error occured: ', e)

Copying to localStorage :  Kenya_Polygon.json
Copying to localStorage :  Maharashtra_OSM_nodes.geojson
Copying to localStorage :  Maharashtra_OSM_relations.geojson
Copying to localStorage :  Maharashtra_OSM_ways.geojson
Copying to localStorage :  Maharashtra_OSM_ways.json
Copying to localStorage :  OSM_nodes.geojson
Copying to localStorage :  OSM_relations.geojson
Copying to localStorage :  OSM_ways.geojson
Copying to localStorage :  db2jcc4.jar
Copying to localStorage :  edited_Maharashtra_OSM_nodes.geojson
Copying to localStorage :  edited_Maharashtra_OSM_relations.geojson
Copying to localStorage :  edited_Maharashtra_OSM_ways.geojson
Copying to localStorage :  edited_OSM_nodes.geojson
Copying to localStorage :  edited_OSM_relations.geojson
Copying to localStorage :  edited_OSM_ways.geojson
Copying to localStorage :  utils.py
External utils succesfully imported


In [5]:
#Get country Relation_ID & add prefix 36/0/0/0.. to match 10 digits length
#//kenya qgis 	    = 3600192798
#//kenya osm 	    =     192798
# Query to find the area ID for Country / # Edit "name"="XX_Country_XX" / Contry name must be local official name e.g. name=Principat d'Andorra; Slovensko; Česko; 
# For County/Distryct edit "admin_level"="4" OR "admin_level"=""

# Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"

# Define the dimensions of the rectangle in meters
width_meters = 10
height_meters = 10

# Get country ID
def get_polygon_id():
    area_query = f"""
        [out:json]; 
        relation["boundary"="administrative"]["name"="{searched_area}"];
        out ids;
        """

    response = requests.get(overpass_url, params={'data': area_query})
    
    if response.status_code == 200:
        data = response.json()
        if data['elements']:
            raw_area_id = data['elements'][0]['id']
            print(f"Raw country polygon area ID: {raw_area_id}")
            
            # Ensure the area ID starts with 36 and is 10 digits long
            area_id_str = str(raw_area_id)
            area_id_str = "36" + area_id_str.zfill(10 - len("36"))
            area_id = int(area_id_str)
                
            print(f"10-digit country polygon area ID: {area_id}")
            return area_id
        else:
            print("No area ID found for country polygon.")
            return None
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None


country_polygon_id = str(get_polygon_id()).zfill(10 - len("36"))

Raw country polygon area ID: 1950884
10-digit country polygon area ID: 3601950884


In [7]:
#GEOJSON OUTPUT for OSM type WAY & upload to COS

# Define the Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"

# Define the Overpass QL query to get OSM labels
overpass_query = f"""
[out:json] [timeout:125];
 area({country_polygon_id}) -> .area_0;
(
    way["amenity"="school"](area.area_0);
    way["amenity"="college"](area.area_0);
    way["amenity"="university"](area.area_0);
    way["amenity"="place_of_worship"](area.area_0);
    way["amenity"="hospital"](area.area_0);
    way["amenity"="fuel"](area.area_0);
    way["amenity"="bank"](area.area_0);
    way["amenity"="office"](area.area_0);
    way["power"="plant"](area.area_0);
    way["building"="commercial"](area.area_0);
    way["building"="industrial"](area.area_0);
    way["shop"="supermarket"](area.area_0);
    way["shop"="mall"](area.area_0);
    way["office"="diplomatic"](area.area_0);
    way["office"="government"](area.area_0);
    way["landuse"="military"](area.area_0);
    way["landuse"="quarry"](area.area_0);
    way["landuse"="industrial"](area.area_0);
    way["landuse"="greenhouse_horticulture"](area.area_0);
    way["aeroway"="aerodrome"](area.area_0);
    way["aeroway"="apron"](area.area_0);
    way["tourism"="attraction"](area.area_0);
    way["information"="office"](area.area_0);
    way["historic"="castle"](area.area_0);
    way["historic"="fort"](area.area_0);
    way["historic"="monastery"](area.area_0);
    way["historic"="monument"](area.area_0);
);
(._;>;);
out geom;
"""

# Make the API call
response = requests.get(overpass_url, params={'data': overpass_query})

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    # Create a GeoJSON structure
    geojson_data = {
        "type": "FeatureCollection",
        "features": []
    }
    # Populate the GeoJSON structure with data for WAY
    for element in data['elements']:
        if element['type'] == 'way':
            feature = {
                "type": "Feature",
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [[
                        [node['lon'], node['lat']] for node in element['geometry']
                    ]]
                },
                "properties": element.get('tags', {})
            }
            geojson_data["features"].append(feature)
            
    file_name_1 = f'{searched_area}_OSM_ways.geojson'
            
    # Save the data to a GeoJSON file
    with open(file_name_1, "w") as f:
        json.dump(geojson_data, f)

# Check the status code of the response
if response.status_code == 200:
    data = response.json()
    # Print the results
    print(f"OK: {response.status_code} Geojson {file_name_1} successfully created")
    
    #print(json.dumps(data, indent=2)) #PRINT RAW DATA FOR TROUBLESHOOTING
    
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
with open(file_name_1, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"], file_name_1)

print(f"GeoJSON file '{file_name_1}' successfully uploaded to COS bucket.")

OK: 200 Geojson Maharashtra_OSM_ways.geojson successfully created
GeoJSON file 'Maharashtra_OSM_ways.geojson' successfully uploaded to COS bucket.


In [8]:
#GEOJSON OUTPUT for OSM type RELATIONS upload to COS

# Define the Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"

# Define the Overpass QL query to get OSM labels
overpass_query = f"""
[out:json] [timeout:125];
 area({country_polygon_id}) -> .area_0;
(
    relation["amenity"="school"](area.area_0);
    relation["amenity"="college"](area.area_0);
    relation["amenity"="university"](area.area_0);
    relation["amenity"="place_of_worship"](area.area_0);
    relation["amenity"="hospital"](area.area_0);
    relation["amenity"="fuel"](area.area_0);
    relation["amenity"="bank"](area.area_0);
    relation["amenity"="office"](area.area_0);
    relation["power"="plant"](area.area_0);
    relation["building"="commercial"](area.area_0);
    relation["building"="industrial"](area.area_0);
    relation["shop"="supermarket"](area.area_0);
    relation["shop"="mall"](area.area_0);
    relation["office"="diplomatic"](area.area_0);
    relation["office"="government"](area.area_0);
    relation["landuse"="military"](area.area_0);
    relation["landuse"="quarry"](area.area_0);
    relation["landuse"="industrial"](area.area_0);
    relation["aerorelation"="aerodrome"](area.area_0);
    relation["aerorelation"="apron"](area.area_0);
    relation["tourism"="attraction"](area.area_0);
    relation["information"="office"](area.area_0);
    relation["historic"="castle"](area.area_0);
    relation["historic"="fort"](area.area_0);
    relation["historic"="monastery"](area.area_0);
    relation["historic"="monument"](area.area_0);
);
(._;>;);
out geom;
"""

# Make the API call
response = requests.get(overpass_url, params={'data': overpass_query})

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    # Create a GeoJSON structure
    geojson_data = {
        "type": "FeatureCollection",
        "features": []
    }
    # Helper function to extract and validate coordinates
    def extract_coordinates(element):
        try:
            return [(node['lon'], node['lat']) for node in element['geometry']]
        except KeyError:
            return []

    # Populate the GeoJSON structure with data
    for element in data['elements']:
        if element['type'] == 'relation':
            polygons = []
            for member in element['members']:
                if member['type'] == 'way' and 'geometry' in member:
                    coordinates = extract_coordinates(member)
                    if len(coordinates) >= 4:
                        try:
                            polygon = Polygon(coordinates)
                            if polygon.is_valid:
                                polygons.append(polygon)
                        except ValueError:
                            print(f"Invalid polygon with coordinates: {coordinates}")

            for polygon in polygons:
                feature = {
                    "type": "Feature",
                    "geometry": mapping(polygon),
                    "properties": element['tags']
                }
                geojson_data["features"].append(feature)
            
    file_name_2 = f'{searched_area}_OSM_relations.geojson'
            
    # Save the data to a GeoJSON file
    with open(file_name_2, "w") as f:
        json.dump(geojson_data, f)

# Check the status code of the response
if response.status_code == 200:
    data = response.json()
    # Print the results
    print(f"OK: {response.status_code} Geojson {file_name_2} successfully created")
    
    #print(json.dumps(data, indent=2)) #PRINT RAW DATA FOR TROUBLESHOOTING

else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
with open(file_name_2, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"], file_name_2)

print(f"GeoJSON file '{file_name_2}' successfully uploaded to COS bucket.")

OK: 200 Geojson Maharashtra_OSM_relations.geojson successfully created
GeoJSON file 'Maharashtra_OSM_relations.geojson' successfully uploaded to COS bucket.


In [9]:
#PROCESSING GEOJSON OUTPUT for OSM NODE & convert RECATANGLE POLYGON 4m2 & upload to COS

# Define the Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"


# Define the Overpass QL query to get OSM labels
overpass_query = f"""
[out:json] [timeout:125];
 area({country_polygon_id}) -> .area_0;
(
    node["amenity"="school"](area.area_0);
    node["amenity"="college"](area.area_0);
    node["amenity"="university"](area.area_0);
    node["amenity"="place_of_worship"](area.area_0);
    node["amenity"="hospital"](area.area_0);
    node["amenity"="fuel"](area.area_0);
    node["amenity"="bank"](area.area_0);
    node["amenity"="office"](area.area_0);
    node["power"="plant"](area.area_0);
    node["building"="commercial"](area.area_0);
    node["building"="industrial"](area.area_0);
    node["shop"="supermarket"](area.area_0);
    node["shop"="mall"](area.area_0);
    node["office"="diplomatic"](area.area_0);
    node["office"="government"](area.area_0);
    node["landuse"="military"](area.area_0);
    node["landuse"="quarry"](area.area_0);
    node["landuse"="industrial"](area.area_0);
    node["aeroway"="aerodrome"](area.area_0);
    node["aeroway"="apron"](area.area_0);
    node["tourism"="attraction"](area.area_0);
    node["information"="office"](area.area_0);
    node["historic"="castle"](area.area_0);
    node["historic"="fort"](area.area_0);
    node["historic"="monastery"](area.area_0);
    node["historic"="monument"](area.area_0);
);
(._;>;);
out geom;
"""

# Make the API call
response = requests.get(overpass_url, params={'data': overpass_query})

# Check if the request was successful
if response.status_code == 200:
    data = response.json()

    # Convert meters to degrees (approx.)
    meters_to_degrees = 1 / 111320

    # Create a GeoJSON structure
    geojson_data = {
        "type": "FeatureCollection",
        "features": []
    }
    # Populate the GeoJSON structure with data
    for element in data['elements']:
        if element['type'] == 'node':
            lon = element['lon']
            lat = element['lat']

            # Calculate the rectangle corners
            d_lon = (width_meters / 2) * meters_to_degrees
            d_lat = (height_meters / 2) * meters_to_degrees

            rectangle = Polygon([
                (lon - d_lon, lat - d_lat),
                (lon + d_lon, lat - d_lat),
                (lon + d_lon, lat + d_lat),
                (lon - d_lon, lat + d_lat),
                (lon - d_lon, lat - d_lat)  # Closing the rectangle
            ])

            feature = {
                "type": "Feature",
                "geometry": mapping(rectangle),
                "properties": element.get('tags', {})
            }
            geojson_data["features"].append(feature)
            
    file_name_3 = f'{searched_area}_OSM_nodes.geojson'
            
    # Save the data to a GeoJSON file
    with open(file_name_3, "w") as f:
        json.dump(geojson_data, f)
        
# Check the status code of the response
if response.status_code == 200:
    data = response.json()
    # Print the results
    print(f"OK: {response.status_code} Geojson {file_name_3} successfully created")
    
    #print(json.dumps(data, indent=2)) #PRINT RAW DATA FOR TROUBLESHOOTING
    
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
with open(file_name_3, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"], file_name_3)

print(f"GeoJSON file '{file_name_3}' successfully uploaded to COS bucket.")

OK: 200 Geojson Maharashtra_OSM_nodes.geojson successfully created
GeoJSON file 'Maharashtra_OSM_nodes.geojson' successfully uploaded to COS bucket.


In [10]:
# Escaping apostrophes and replacing key name for non blank attributes
# Add related cathegory for replacement

replacements = {
#KEYS
    "'": "''",
    '"amenity": "school"': '"osm_type": "school"',
    '"amenity": "college"': '"osm_type": "college"',
    '"amenity": "university"': '"osm_type": "university"',
    '"amenity": "place_of_worship"': '"osm_type": "Place of Worship"',
    '"amenity": "hospital"': '"osm_type": "hospital"',
    '"amenity": "fuel"': '"osm_type": "fuel"',
    '"amenity": "bank"': '"osm_type": "bank"',
    '"amenity": "office"': '"osm_type": "office"',
    '"landuse": "quarry"': '"osm_type": "quarry"',
    '"landuse": "industrial"': '"osm_type": "industrial"',
    '"landuse": "greenhouse_horticulture"': '"osm_type": "Greenhouse"',
    '"building": "commercial"': '"osm_type": "commercial"',
    '"building": "industrial"': '"osm_type": "industrial"',
    '"shop": "mall"': '"osm_type": "mall"',
    '"shop": "supermarket"': '"osm_type": "supermarket"',
    '"office": "government"': '"osm_type": "government"',
    '"office": "diplomatic"': '"osm_type": "diplomatic"',
    '"information": "office"': '"osm_type": "office"',
    '"tourism": "attraction"': '"osm_type": "attraction"',
    '"historic": "': '"osm_type": "',
    '"power": "plant"': '"osm_type": "power plant"',
    '"landuse": "military"': '"osm_type": "INACTIVE"',
    '"aeroway": "apron"': '"osm_type": "Airport facility"',
    '"aeroway": "aerodrome"': '"osm_type": "Airport facility"'

}

# Function to apply replacements
def apply_replacements(text, replacements):
    for old, new in replacements.items():
        text = text.replace(old, new)
    return text

with open(file_name_1, 'r', encoding='utf-8') as file:
    content = file.read()
    
modified_content = apply_replacements(content, replacements)

with open(file_name_1, 'w', encoding='utf-8') as file:
    file.write(modified_content)
    
    print("OSM Ways eplacement successfull.")
    
with open(file_name_2, 'r', encoding='utf-8') as file:
    content = file.read()
    
modified_content = apply_replacements(content, replacements)

with open(file_name_2, 'w', encoding='utf-8') as file:
    file.write(modified_content)
    
    print("OSM Relations replacement successfull.")
    
with open(file_name_3, 'r', encoding='utf-8') as file:
    content = file.read()
    
modified_content = apply_replacements(content, replacements)

with open(file_name_3, 'w', encoding='utf-8') as file:
    file.write(modified_content)
    
    print("OSM Nodes replacement successfull.")
    
# Upload modified results for troubleshooting

with open(file_name_1, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"],"edited_"+file_name_1)
print(f"GeoJSON file edited_{file_name_1} successfully uploaded to COS bucket.")

with open(file_name_2, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"],"edited_"+file_name_2)
print(f"GeoJSON file edited_{file_name_2} successfully uploaded to COS bucket.")

with open(file_name_3, 'rb') as data:
    cos_client.upload_fileobj(data, config["OSM_DATA"],"edited_"+file_name_3)
print(f"GeoJSON file edited_{file_name_3} successfully uploaded to COS bucket.")

OSM Ways eplacement successfull.
OSM Relations replacement successfull.
OSM Nodes replacement successfull.
GeoJSON file edited_Maharashtra_OSM_ways.geojson successfully uploaded to COS bucket.
GeoJSON file edited_Maharashtra_OSM_relations.geojson successfully uploaded to COS bucket.
GeoJSON file edited_Maharashtra_OSM_nodes.geojson successfully uploaded to COS bucket.


In [11]:
json_filename = 'OSM_labeling_state.json'

def log_state_to_bucket(processing_state: dict):
    
    with open(json_filename, "w") as outfile:
                json.dump(processing_state, outfile)
                
    cos_client.upload_file(
        Filename=json_filename,
        Bucket='notebook-job-status',
        Key=json_filename,
        )

In [12]:
def connect_to_db():

    jar = 'db2jcc4.jar'
    os.environ['CLASSPATH'] = jar

    args='-Djava.class.path=%s' % jar
    jvm_path = jpype.getDefaultJVMPath()
    try:
        jpype.startJVM(jvm_path, args)
    except Exception as e:
        print('startJVM exception: ', e)
        
    if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
        jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
        
    # create JDBC connection
    conn = jdbc.connect(
                'com.ibm.db2.jcc.DB2Driver',
                config['DB2_CONNECTION_STRING'],
                [config["DB2_USERNAME"], config["DB2_PASSWORD"]],
                'db2jcc4.jar')
    
    curs = conn.cursor()

    return curs

curs = connect_to_db()

startJVM exception:  startJVM() got an unexpected keyword argument 'convert_strings'


In [15]:
def fetch_non_null_osmtype_buildings(cursor):
    '''
        This particular function is aimed for obtating all non null OSM_TYPE from defined  for selected SQL table
    '''

    # fetch column names from defined SQL table

    columns = ['latitude', 'longitude']

    # sql statement for selecting entries by defined rectangle boundaries
    sql = f"""
            SELECT * FROM "USER1"."{sql_tablename}" WHERE OSM_TYPE NOT IN ('apartments', 'residential', 'house', 'hut') AND OSM_TYPE IS NOT NULL
        """
    
    try:
        cursor.execute(sql)
        data = cursor.fetchall()
    except Exception as e:
        print(f"Fetch items error occured: {e}")
        print("Reconnecting to the database try again...")

        conn = connect_to_db()
        cursor = conn.cursor()
        cursor.execute(sql)
        data = cursor.fetchall()
    finally:
        df = pd.DataFrame(data=data, columns=columns)

        return df

In [None]:
non_null_osm_df = fetch_non_null_osmtype_buildings(curs)
non_null_osm_df

In [None]:
def set_null_OSM_TYPE_DB2_row(lat, lon, cursor):
    try:
        sql = f"""
        UPDATE "USER1"."{sql_tablename}"
            SET
                "OSM_TYPE" = NULL
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)

In [None]:
for row in non_null_osm_df.itertuples():
    
    try:
        set_null_OSM_TYPE_DB2_row(row.latitude, row.longitude, cursor)
    except Exception as e:
        print(f'Exception occured: {e}')

In [14]:
# Function to update db2
# Modify SQL query according to OSM object type

def upd_split_db2(lat, lon, osm_type, cursor):
    try:
        sql = f"""
        UPDATE "USER1"."{sql_tablename}"
            SET
                "OSM_TYPE" = '{osm_type}',
                "CLASSIFICATION_SOURCE" = 'OSM Derived',
                "CLASSIFICATION_TYPE" = 'non-res',
                "OSM_OTHER_TAGS" = 'WAY'
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)



In [15]:
def fetch_builings_in_bbox(cursor, lon_min, lon_max, lat_min, lat_max):
    '''
        This particular function is aimed for obtating all entries from defined rectangle for selected SQL table
    '''

    # fetch column names from defined SQL table

    columns = ['latitude', 'longitude']
    
    # sql statement for selecting entries by defined rectangle boundaries
    sql = f"""
        SELECT {', '.join(columns)} FROM USER1.{sql_tablename}
        WHERE 
            (LATITUDE >= {lat_min}) AND 
            (LATITUDE <= {lat_max}) AND 
            (LONGITUDE >= {lon_min}) AND 
            (LONGITUDE <= {lon_max})
        """
    
    try:
        cursor.execute(sql)
        data = cursor.fetchall()
    except Exception as e:
        print(f"Fetch items error occured: {e}")
        print("Reconnecting to the database try again...")

        conn = connect_to_db()
        cursor = conn.cursor()
        cursor.execute(sql)
        data = cursor.fetchall()
    finally:
        df = pd.DataFrame(data=data, columns=columns)

        return df

In [None]:
# Processing OSM Way information

with open(file_name_1) as s_f:
    geojson = json.load(s_f)

dfs_list = []

# Intersections DB CENTROID vs OSM WAY FOOTPRINT

def process_buildings(feature):
    polygon_coordinates = feature['geometry']['coordinates'][0]
    polygon = shapely.Polygon(polygon_coordinates)
    min_lon, min_lat, max_lon, max_lat = polygon.bounds
    
    filtered_buildings = fetch_builings_in_bbox(curs, min_lon, max_lon, min_lat, max_lat)
    
    filtered_buildings['osm_type'] = feature['properties'].get('osm_type', '').title()
    
    affected_buildings = 0
    #update db2 without assigning values to dataframe
    for row in tqdm(filtered_buildings.itertuples(), total=len(filtered_buildings), desc='ingestion_data'):
        if polygon.contains(shapely.Point(row.longitude, row.latitude)):
            upd_split_db2(row.latitude, row.longitude, row.osm_type, curs)
            
            affected_buildings += 1
            
    return affected_buildings

processing_state = {}
total_features = len(geojson['features'])
total_affected_buildings = 0

for fidx, feature in enumerate(geojson['features']):
    
    try:
        affected_buildings = process_buildings(feature)

        total_affected_buildings += affected_buildings

        processing_state['processed_counts'] = f'Processed: {fidx+1} of {total_features} | {round(100*(fidx + 1)/total_features, 3)}%'
        processing_state['process'] = 'OSM WAY processing'
        processing_state['total_affected_buildings'] = total_affected_buildings

        log_state_to_bucket(processing_state)
    except Exception as e:
        print(e)


In [None]:
#Function to update db2
# Modify SQL query according to OSM object type

def upd_split_db2(lat, lon, osm_type, cursor):
    try:
        sql = f"""
        UPDATE "USER1"."{sql_tablename}"
            SET
                "OSM_TYPE" = '{osm_type}',
                "CLASSIFICATION_SOURCE" = 'OSM Derived',
                "CLASSIFICATION_TYPE" = 'non-res',
                "OSM_OTHER_TAGS" = 'RELATION'
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)

In [None]:
# Processing OSM data from relations

with open(file_name_2) as s_f:
    geojson = json.load(s_f)

dfs_list = []

def process_buildings(feature):
    polygon_coordinates = feature['geometry']['coordinates'][0]
    polygon = shapely.Polygon(polygon_coordinates)
    min_lon, min_lat, max_lon, max_lat = polygon.bounds
    
    filtered_buildings = fetch_builings_in_bbox(curs, min_lon, max_lon, min_lat, max_lat)
    
    filtered_buildings['osm_type'] = feature['properties'].get('osm_type', '').title()
    
    affected_buildings = 0
    #update db2 without assigning values to dataframe
    for row in tqdm(filtered_buildings.itertuples(), total=len(filtered_buildings), desc='ingestion_data'):
        if polygon.contains(shapely.Point(row.longitude, row.latitude)):
            upd_split_db2(row.latitude, row.longitude, row.osm_type, curs)
            
            affected_buildings += 1
            
    return affected_buildings

for feature in geojson['features']:
    process_buildings(feature)


processing_state = {}
total_features = len(geojson['features'])

for fidx, feature in enumerate(geojson['features']):
    
    try:
        affected_buildings = process_buildings(feature)

        total_affected_buildings += affected_buildings

        processing_state['processed_counts'] = f'Processed: {fidx+1} of {total_features} | {round(100*(fidx + 1)/total_features, 3)}%'
        processing_state['process'] = 'OSM RELATIONS processing'
        processing_state['total_affected_buildings'] = total_affected_buildings

        log_state_to_bucket(processing_state)
    except Exception as e:
        print(e)


In [None]:
#Function to update db2
# Modify SQL query according to OSM object type

def upd_split_db2(lat, lon, osm_type, cursor):
    try:
        sql = f"""
        UPDATE "USER1"."{sql_tablename}"
            SET
                "OSM_TYPE" = '{osm_type}',
                "CLASSIFICATION_SOURCE" = 'OSM Derived',
                "CLASSIFICATION_TYPE" = 'non-res',
                "OSM_OTHER_TAGS" = 'NODE'
            WHERE 
                ("LATITUDE" = {lat}) AND 
                ("LONGITUDE" = {lon})
        """
        cursor.execute(sql)
    except Exception as e:
        print(e, sql)

In [None]:
# Processing Node information

with open(file_name_3) as s_f:
    geojson = json.load(s_f)

dfs_list = []

# Intersections CENTROID vs OSM NODES
def process_buildings(feature):
    polygon_coordinates = feature['geometry']['coordinates'][0]
    polygon = shapely.Polygon(polygon_coordinates)
    min_lon, min_lat, max_lon, max_lat = polygon.bounds
    
    filtered_buildings = fetch_builings_in_bbox(curs, min_lon, max_lon, min_lat, max_lat)
    

    filtered_buildings['osm_type'] = feature['properties'].get('osm_type', '').title()
    
    affected_buildings = 0
    #update db2 without assigning values to dataframe
    for row in tqdm(filtered_buildings.itertuples(), total=len(filtered_buildings), desc='ingestion_data'):
        if polygon.contains(shapely.Point(row.longitude, row.latitude)):
            upd_split_db2(row.latitude, row.longitude, row.osm_type, curs)
            
            affected_buildings += 1
            
    return affected_buildings

processing_state = {}
total_features = len(geojson['features'])

for fidx, feature in enumerate(geojson['features']):
    
    try:
        affected_buildings = process_buildings(feature)

        total_affected_buildings += affected_buildings

        processing_state['processed_counts'] = f'Processed: {fidx+1} of {total_features} | {round(100*(fidx + 1)/total_features, 3)}%'
        processing_state['process'] = 'OSM RELATIONS processing'
        processing_state['total_affected_buildings'] = total_affected_buildings

        log_state_to_bucket(processing_state)
    except Exception as e:
        print(e)

    
