## 8_rural_urban_json_segregation
### Creates polygons from the urbanization layer provided as a raster, these polygons are stored and used by 9_urban_rural_segregation to categorize building into urbanization categories based on their location
### This is an auxiliary notebook allowing efficient categorization of buildings and it needs to be executed twice together with 9_urban_rural_segregation to categorize building on a "overview" level, i.e., urban-suburban-rural categories and second time to categorize buildings into finer grainde categories using the "detailed" option SEGREGATION_TYPE
### The intermediate result of the overview level categorization should be moved to the front-end as and overlay

### The result can be manually appended in case the SMOD grid is truncated by altering the penultimate cell in the notebook - in case of Kenya the inner city area of Mombasa was missing form the SMOD definitions - Uncomment the cell at your own peril!

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "COUNTRY_NAME": "Kenya",
    "SMOD_BUCKET": "xxx",
    "SEGREGATION_STYLE": "overview"
    }
    """


In [10]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [11]:
# Import necessary libraries
import pandas as pd
import geopandas as gpd
import jaydebeapi as jdbc
import jpype
import os
import shapely
import rasterio as rio
from rasterio.plot import show
from rasterio.mask import mask
import matplotlib.pyplot as plt
import  rioxarray
from skimage import measure as M
import requests
from botocore.client import Config
import ibm_boto3
import io

In [12]:
# country = config["COUNTRY_NAME"]
# intermediate_SMOD_tif_file = country + "_SMOD.tif"
# output_SMOD_polygon_geojson =  country + "_segregated_cleaned.json"

segregation = {}
segregation_priorities = []

if config["SEGREGATION_STYLE"] == "overview":
    segregation = {
        'URBAN': [22, 23, 30],
        'SUBURBAN': [21],
        'RURAL': [12, 13],
    }
    segregation_priorities = ['URBAN', 'SUBURBAN']

if config["SEGREGATION_STYLE"] == "detailed":
    segregation = {
        'URBAN_CENTER': [30],
        'DENSE_URBAN': [23],
        'SEMI_DENSE_URBAN': [22],
        'SUBURBAN_PERI_URBAN': [21],
        'RURAL_CLUSTER': [13],
        'LOW_DENSITY_RURAL': [12],
    }
    segregation_priorities = ['URBAN_CENTER', 'DENSE_URBAN', 'SEMI_DENSE_URBAN', 'SUBURBAN', 'RURAL', 'LOW_DENSITY_RURAL']

In [13]:
# init S3 client in order to work with last tiff file version
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])


# import external utils library
response = cos_client.list_objects_v2(Bucket=config["UTILS_BUCKET"])

utils_to_download = ['india_state.geojson']

try:
    for obj in response['Contents']:
        name = obj['Key']
        if name in utils_to_download:
            streaming_body_1 = cos_client.get_object(Bucket=config["UTILS_BUCKET"], Key=name)['Body']
            print("Copying to localStorage :  " + name)
            with io.FileIO(name, 'w') as file:
                for i in io.BytesIO(streaming_body_1.read()):
                    file.write(i)
                
    print('External utils succesfully imported')
except Exception as e:
    print('Error occured: ', e)

Copying to localStorage :  india_state.geojson
External utils succesfully imported


In [15]:
india_states_df = gpd.read_file('india_state.geojson')

# define the necessary regions and their appropriate polygons
# each key contains desired region name value is a list of appropriate polygons
regions_polygons = {
    'Madhya Pradesh': [
        india_states_df[india_states_df.NAME_1.isin(['Madhya Pradesh'])].geometry.iloc[0]
        ],
    'South-India': [
        india_states_df[india_states_df.NAME_1 == 'Tamil Nadu'].geometry.iloc[0].geoms[-1], # this geometry is a MultiPolygon so we need to  extract the main polygon from it
        india_states_df[india_states_df.NAME_1 == 'Kerala'].geometry.iloc[0].geoms[-1]
    ],
    'East-India': [
        india_states_df[india_states_df.NAME_1 == 'Jharkhand'].geometry.iloc[0],
        india_states_df[india_states_df.NAME_1 == 'Nagaland'].geometry.iloc[0],
        india_states_df[india_states_df.NAME_1 == 'Mizoram'].geometry.iloc[0],
        india_states_df[india_states_df.NAME_1 == 'Assam'].geometry.iloc[0].geoms[-1],
    ]
}

regions_polygons

{'Madhya Pradesh': [<POLYGON ((78.365 26.869, 78.367 26.863, 78.37 26.858, 78.375 26.847, 78.381...>],
 'South-India': [<POLYGON ((80.076 13.527, 80.076 13.526, 80.079 13.529, 80.087 13.527, 80.08...>,
  <POLYGON ((74.996 12.788, 75 12.783, 75.004 12.786, 75.005 12.785, 75.004 12...>],
 'East-India': [<POLYGON ((87.6 25.315, 87.607 25.311, 87.614 25.316, 87.623 25.311, 87.626 ...>,
  <POLYGON ((95.214 26.937, 95.217 26.934, 95.226 26.934, 95.229 26.931, 95.23...>,
  <POLYGON ((92.801 24.419, 92.804 24.419, 92.807 24.42, 92.809 24.419, 92.809...>,
  <POLYGON ((95.952 27.942, 95.952 27.939, 95.952 27.937, 95.958 27.937, 95.95...>]}

In [20]:
#Reproject tiff to EPSG:4326 CRS uring Rasterio
def reproject_tif_CRS(filename: str):
    rds = rioxarray.open_rasterio(filename)
    rds_4326 = rds.rio.reproject("EPSG:4326")
    rds_4326.rio.to_raster(filename, compress="DEFLATE")

In [21]:
in_tiff_file = 'GHS_SMOD_E2025_GLOBE_R2023A_54009_1000_V2_0.tif'

In [22]:
reproject_tif_CRS(in_tiff_file)

In [23]:
def get_region_smod_tiff(in_tiff_file, out_tiff_file, polygon):

    with rio.open(in_tiff_file) as src:
        #mask the raster data based on geometry
        out_image, out_transform = mask(src, [polygon], crop=True)
        out_meta = src.meta
        
        #update metadata
        out_meta.update(
            {
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -1.0
            }
        )
        #Save the masked taster data to a new tiff file
        with rio.open(out_tiff_file, "w", **out_meta) as dest:
            dest.write(out_image)

In [25]:
tiff_filenames = []
for region_name, region_polygons in regions_polygons.items():
    
    for pidx, polygon in enumerate(region_polygons):
        
        out_tiff_file = f"{region_name.replace(' ', '_')}_{pidx}.tif"
        get_region_smod_tiff(in_tiff_file, out_tiff_file, polygon)
        
        tiff_filenames.append(out_tiff_file)

In [26]:
from collections import Counter

def generate_segregated_geojson(in_tiff_file, out_json_file):
    try:
        with rio.open(in_tiff_file) as src:
            
            segregated_polygons = {}
            #Loop through each and segregate polygons
            for k, v in segregation.items():
                data = src.read(1)
                #Update values in the raster data based on the defined range
                data[data > max(v)] = 0
                data[(data < min(v)) & (data > 0)] = 0

                for layer in v:
                    data[data == layer] = max(v)

                flatten = data.flatten(order='C')

                #print(list(set(flatten)))
                #print(Counter(flatten))

                contours = M.find_contours(data, max(v)/2)

                print(f'{len(contours)} contours {max(v)} find for {k}')
                polygons = []
                # Convert pixel coords to latlon coords and create shapely polygons
                for contour in contours:

                    latlon_coords = []

                    for coords in contour:
                        lon, lat = rio.transform.xy(src.transform, coords[0], coords[1])
                        latlon_coords.append([lon, lat])

                    latlon_coords.append(latlon_coords[0])
                    latlon_coords = shapely.Polygon(latlon_coords)
                    
                    polygons.append(latlon_coords)

                segregated_polygons[k] = polygons
                
            # print(f"Cloud polygond were found: {len(polygons)}")

        
    except Exception as e:
        print(f"Exception occured: {e}")
        
        
    #Initialize an empty GeoJSON object
    geojson = {
    "type": "FeatureCollection",
    "features": []
    }

    for area in segregation_priorities:

        polygons = []

        #Retrieve the polygons corresponding to area
        poly_temp = segregated_polygons[area]

        flag = True
        #Iterate through each polygon in the list
        for idx, p1 in enumerate(poly_temp):

            flag = True
            #Check for intersections with subsequent polygons
            for idxy, p2 in enumerate(poly_temp[idx + 1: ]):

                if p1.intersects(p2):
                    #If intersection occurs merge the polygons
                    poly_temp[idx + idxy + 1] = p2.union(p1)
                    flag = False
                    break
            # If no intersection occured, add the polygon to the list
            if flag: polygons.append(p1)

        for coords in polygons:
            #Create geojson feature for each polygon
            feature = {
                "type": "Feature",
                    "properties": {'seg_type': area},
                    "geometry": {
                        "coordinates": json.loads(shapely.to_geojson(coords))['coordinates'],
                        "type": "Polygon"
                    }}
                
            geojson['features'].append(feature)
        
        #Write the cleaned geojson to json file
    with open(out_json_file, "w") as outfile: 
        json.dump(geojson, outfile)

    
# print(segregated_polygons)
# 654 contours 30 find for URBAN
# 1436 contours 21 find for SUBURBAN
# 1558 contours 13 find for RURAL

In [None]:
for tif_name in tiff_filenames:
    print(f'Processing {tif_name}')
    json_filename = tif_name.replace('.tif', '.json')
    
    generate_segregated_geojson(tif_name, json_filename)    


In [52]:
jsons = [i for i in os.listdir(os.getcwd()) if '.json' in i]


for region_name, region_polygons in regions_polygons.items():
        
    region_name = region_name.replace(' ', '_')
    
    region_jsons = [i for i in jsons if region_name in i]
    print(region_name, region_jsons)
    
    all_features = []
    if len(region_jsons) > 1:
        
        for region_json in region_jsons:
            geojson = json.load(open(region_json))
            all_features += geojson.get('features')
        
        out_geojson = {
            'type': 'FeatureCollection',
            'features':all_features}
        
        out_json_file = f'{region_name}.json'
        with open(out_json_file, "w") as outfile: 
            json.dump(out_geojson, outfile)
    
    else:
        out_json_file = f'{region_name}.json'
        os.rename(region_jsons[0], out_json_file)
            
    #  optionaly upload file to the bucket
    if type(config["SMOD_BUCKET"]) == str:
        
        try:
            cos_client.upload_file(
                Filename=out_json_file,
                Bucket=config["SMOD_BUCKET"],
                Key=out_json_file,
                ExtraArgs={'ContentDisposition': 'attachment'}
            )
            
            print(f'File {out_json_file} successfully uploaded to the COS {config["SMOD_BUCKET"]} bucket')
        except Exception as e:
            print(f'\033[91mFailed upload file to the bucket {config["SMOD_BUCKET"]}. Error: {e}')


Madhya_Pradesh ['Madhya_Pradesh_0.json']
File Madhya_Pradesh.json successfully uploaded to the COS ghsl-smod-data bucket
South-India ['South-India_1.json', 'South-India_0.json']
File South-India.json successfully uploaded to the COS ghsl-smod-data bucket
East-India ['East-India_0.json', 'East-India_1.json', 'East-India_2.json', 'East-India_3.json']
File East-India.json successfully uploaded to the COS ghsl-smod-data bucket


In [None]:
# Manual addition to the resulting geoJSON file, please be careful when updating this cell
'''
custom_feature = {
      "type": "Feature",
      "properties": {'seg_type': 'URBAN'},
      "geometry": {
        "coordinates": [
          [
            [
              39.603631842920805,
              -4.026583935088453
            ],
            [
              39.5913662604473,
              -4.027005352554326
            ],
            [
              39.58887170676036,
              -4.0292547890088315
            ],
            [
              39.588110158959665,
              -4.03214921791276
            ],
            [
              39.58588994793769,
              -4.038017650884001
            ],
            [
              39.58631953096608,
              -4.041527588926044
            ],
            [
              39.58822883987477,
              -4.043516409041231
            ],
            [
              39.59172483890467,
              -4.0436580405755365
            ],
            [
              39.61029224036358,
              -4.043374726121641
            ],
            [
              39.61028862359569,
              -4.044718641995232
            ],
            [
              39.610235789335064,
              -4.045939911856763
            ],
            [
              39.61258120580976,
              -4.050727149338229
            ],
            [
              39.617409849085305,
              -4.050374545129131
            ],
            [
              39.62280268383958,
              -4.047841652865998
            ],
            [
              39.62721739554115,
              -4.0457507568957425
            ],
            [
              39.63035680368935,
              -4.043925955762731
            ],
            [
              39.638121954408206,
              -4.043210910949725
            ],
            [
              39.6415917927653,
              -4.043670687517547
            ],
            [
              39.642846688423816,
              -4.045760454190884
            ],
            [
              39.64479868469016,
              -4.0487034115498375
            ],
            [
              39.64714318098942,
              -4.051832226824047
            ],
            [
              39.64721269439204,
              -4.059776989316703
            ],
            [
              39.64796366538113,
              -4.0613174771694105
            ],
            [
              39.65062385692548,
              -4.061266238106413
            ],
            [
              39.65441095541257,
              -4.064486953660577
            ],
            [
              39.65558079698883,
              -4.067638529957861
            ],
            [
              39.66066726525628,
              -4.072332713969132
            ],
            [
              39.6630720450585,
              -4.075140296216546
            ],
            [
              39.665214881078924,
              -4.078281897301935
            ],
            [
              39.667789538246694,
              -4.079374768497843
            ],
            [
              39.67096125289615,
              -4.07932513947587
            ],
            [
              39.673328103038415,
              -4.078778925304391
            ],
            [
              39.673128069010204,
              -4.07708704905113
            ],
            [
              39.67505357247717,
              -4.073888227235059
            ],
            [
              39.67804072269328,
              -4.073537232409635
            ],
            [
              39.679460211069255,
              -4.073186047020016
            ],
            [
              39.682206305794494,
              -4.071438281976157
            ],
            [
              39.69022221126801,
              -4.063841504922365
            ],
            [
              39.693334452026164,
              -4.063792773616697
            ],
            [
              39.69754976958305,
              -4.062084405774968
            ],
            [
              39.70329793083974,
              -4.05815923820289
            ],
            [
              39.711254468785114,
              -4.046311945017152
            ],
            [
              39.700961774218996,
              -4.04651319080692
            ],
            [
              39.68108273350589,
              -4.046245450442768
            ],
            [
              39.680510591462195,
              -4.0456623676102055
            ],
            [
              39.680519831232886,
              -4.025960338859662
            ],
            [
              39.67055734911321,
              -4.01604282596513
            ],
            [
              39.668674456202695,
              -4.011767090118681
            ],
            [
              39.66578880260934,
              -4.013625662595359
            ],
            [
              39.660763459861045,
              -4.013367898189216
            ],
            [
              39.66075614004066,
              -4.030628110581148
            ],
            [
              39.64935177477139,
              -4.030378162413783
            ],
            [
              39.64810178217124,
              -4.030128100214995
            ],
            [
              39.645049398943655,
              -4.029527485216349
            ],
            [
              39.64349607671602,
              -4.027822163867768
            ],
            [
              39.64225395500429,
              -4.025168477191414
            ],
            [
              39.641744988844636,
              -4.022434605211188
            ],
            [
              39.64163087679535,
              -4.016720874564953
            ],
            [
              39.64066238991785,
              -4.014923190023197
            ],
            [
              39.63846630239186,
              -4.012344229299828
            ],
            [
              39.63822696297842,
              -4.005687261752001
            ],
            [
              39.632038027940155,
              -4.00605336924896
            ],
            [
              39.632045067881194,
              -4.016002797180306
            ],
            [
              39.621461761611926,
              -4.026533546410576
            ],
            [
              39.603631842920805,
              -4.026583935088453
            ]
          ]
        ],
        "type": "Polygon"
      }
    }

#Append custom feature to features list
geojson['features'].append(custom_feature)
'''