## Initial configuration
### To start working with this particular notebook, you need to provide necessary credential and settings
### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "ML_MODELS_BUCKET": "ml-saved-models",
    "ML_MODELS_BUCKET_CRN": "xxx",
    "CLOUDANT_API_KEY": "xxx",
    "CLOUDANT_URL": "xxx",
    "CLOUDANT_UI_DB_API_KEY": "xxx",
    "CLOUDANT_UI_DB_URL": "xxx",
    "UTILS_BUCKET": "notebook-utils-bucket",
    "BUCKET_TIFF": "kenya-images",
    "HEIGHTS_TIFF_FILENAME": "WSF3Dv3_Kenya.tif",
    "DB_NAME": "features_db",
    "UI_DB_NAME": "buildings_db",
    "COS_ENDPOINT_URL": "xxx",
    "COS_APIKEY": "xxx",
    "TYPE_SOURCE_FILTER": "area",
    "AREA_TRESHOLD": 0
    }
    """


In [None]:
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [None]:
#!pip install ibmcloudant

In [2]:
import io
import shutil
import os
from tensorflow import keras
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import sklearn.metrics as SKM
import ibm_boto3
from botocore.client import Config
import base64
from tqdm import tqdm
import time 

from ibmcloudant.cloudant_v1 import CloudantV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import cv2

import ibmcloudant

In [3]:
# init cloudant db client
authenticator = IAMAuthenticator(config["CLOUDANT_API_KEY"])
client = CloudantV1(authenticator=authenticator)
client.set_service_url(config["CLOUDANT_URL"])

# init COS client
data_conn = ibm_boto3.resource(service_name='s3',
    ibm_api_key_id=config["COS_APIKEY"],
    ibm_service_instance_id=config["ML_MODELS_BUCKET_CRN"],
    ibm_auth_endpoint='https://iam.bluemix.net/oidc/token',
    config=Config(signature_version='oauth'),
    endpoint_url=config["COS_ENDPOINT_URL"])


### Download & prepare testing data

In [4]:
print('Init data path for dataset')
base_path = os.getcwd()
data_path = os.path.join(base_path, 'Sentinel2_set/')

# createmtrain, test and validation folders
train_path = os.path.join(data_path, 'train/')
test_path = os.path.join(data_path, 'test/')
validation_path = os.path.join(data_path, 'validation/')

# create residential and nonresidential subfolders
train_res_path = os.path.join(train_path, 'residential')
train_nonres_path = os.path.join(train_path, 'nonresidential')

test_res_path = os.path.join(test_path, 'residential')
test_nonres_path = os.path.join(test_path, 'nonresidential')

validation_res_path = os.path.join(validation_path, 'residential')
validation_nonres_path = os.path.join(validation_path, 'nonresidential')



Init data path for dataset


In [5]:
res_path = os.path.join(data_path, 'residential')
nonres_path = os.path.join(data_path, 'nonresidential')

print('Delete local res and nonres directories, if exists (in ordet to have updated data)')
try:
    shutil.rmtree(train_res_path)
except:
    pass
try:
    shutil.rmtree(test_res_path)
except:
    pass
try:
    shutil.rmtree(validation_res_path)
except:
    pass



try:
    shutil.rmtree(train_nonres_path)
except:
    pass
try:
    shutil.rmtree(test_nonres_path)
except:
    pass
try:
    shutil.rmtree(validation_nonres_path)
except:
    pass

os.makedirs(train_res_path, exist_ok = True)
os.makedirs(test_res_path, exist_ok = True)
os.makedirs(validation_res_path, exist_ok = True)

os.makedirs(train_nonres_path, exist_ok = True)
os.makedirs(test_nonres_path, exist_ok = True)
os.makedirs(validation_nonres_path, exist_ok = True)



Delete local res and nonres directories, if exists (in ordet to have updated data)


In [6]:
# fetch OSM testing data
response = client.post_find(
    db=config["DB_NAME"],
    selector={
        "properties.type_source": "osm", # filter for OSM entries
        "properties.image_ML_type": "test", # filter for OSM entries
        "_attachments": {"$exists": True}, # filter only exists attachments
        "properties.area_in_meters": { "$gt": 20}
        },
    fields=["_id", "_attachments", "properties.image_ML_type", "properties.image_ML_class"],
).get_result()

print(f"Images revealed in the database: {len(response['docs'])}")
# store id and data for all attachments

for document in tqdm(response["docs"], desc='Images processed'):
    try: 
        image_ML_type = document['properties']['image_ML_type'] # use this to assign to appropriate folder train, test, validation
        image_ML_class = document['properties']['image_ML_class'] # use this to assign to appropriate subfolder residential or nonresidential
        response = client.get_document(
            db=config["DB_NAME"],
            doc_id=document["_id"],
            attachments=True,
        ).get_result()

        for attachment_name, attachment_info in response['_attachments'].items():
            attachment_data = base64.b64decode(attachment_info['data'])
            attachment_object = {
                "id": document["_id"],
                "data": cv2.imdecode(np.frombuffer(attachment_data, np.uint8), cv2.IMREAD_COLOR),
            }

            # Store cv2 image locally

            if image_ML_type == 'train':
                if image_ML_class == 'residential':
                    save_path, name_appendix = train_res_path, "_train_res_.png"
                elif image_ML_class == 'nonresidential':
                    save_path, name_appendix = train_nonres_path, "_train_nonres_.png"

            elif image_ML_type == 'test':
                if image_ML_class == 'residential':
                    save_path, name_appendix = test_res_path, "_test_res_.png"
                elif image_ML_class == 'nonresidential':
                    save_path, name_appendix = test_nonres_path, "_test_nonres_.png"

            elif image_ML_type == 'validation':
                if image_ML_class == 'residential':
                    save_path, name_appendix = validation_res_path, "_validation_res_.png"
                elif image_ML_class == 'nonresidential':
                    save_path, name_appendix = validation_nonres_path, "_validation_nonres_.png"

            file_name = f"/osm_id_{response['properties']['osm_id']}_lon_{response['_id'].split(':')[0]}_lat_{response['_id'].split(':')[1]}"
            cv2.imwrite(save_path + file_name + name_appendix, attachment_object['data'])
            time.sleep(0.005)
    except Exception as e:
        print(f"Document id {document['_id']} Exception occured: {e}")



Images revealed in the database: 6155


Images processed: 100%|██████████| 6155/6155 [01:22<00:00, 74.72it/s]


In [7]:
# define data generator with test data
train_datagen = keras.preprocessing.image.ImageDataGenerator()

test_generator = train_datagen.flow_from_directory(
    test_path,
    target_size=(124, 124),
    batch_size=len(os.listdir(test_res_path)) + len(os.listdir(test_nonres_path)),
    class_mode='binary')

Found 6155 images belonging to 2 classes.


In [10]:
# len(os.listdir(test_res_path)) + len(os.listdir(test_nonres_path))
print(f'Res images amount: {len(os.listdir(test_res_path))}, Nonres images amount: {len(os.listdir(test_nonres_path))}')

Res images amount: 5280, Nonres images amount: 875


In [11]:
# download models
models_dir = os.path.join(base_path, 'models')
os.makedirs(models_dir, exist_ok = True)

model_names = ['Baseline_CFG009_ResNet50_dt09_05_2023_01_06_23.h5', 'CustomArchitecture_CFG010_DenseNet121_dt09_06_2023_01_05_44.h5']
model_paths = []

for model_name in model_names:
    
    model_path = os.path.join(models_dir, model_name)
    model_paths.append(model_path)
    
    fobj = data_conn.Object(config["ML_MODELS_BUCKET"], model_name).get()['Body'].read()
    fobj = io.BytesIO(fobj)

    with open(model_path, 'wb') as outfile:
        outfile.write(fobj.getbuffer())

        
os.listdir(models_dir)

['CustomArchitecture_CFG010_DenseNet121_dt09_06_2023_01_05_44.h5',
 'Baseline_CFG009_ResNet50_dt09_05_2023_01_06_23.h5']

In [42]:

def evaluate_model(idx, model_name):
    '''
    Evaluate model function performs scoring of model based on test part of the dataset
    Result of function execution is a confucion matrix
    '''
    
    print(f"Evaluating model {model_name}")
    model = keras.models.load_model(os.path.join(models_dir, model_name))
    print()
    images, true_labels = test_generator.next()
    true_labels = [int(j) for j in true_labels]

    predictions = [1 if i[0] > 0.5 else 0 for i in model.predict(images)]
    confusion_matrix = SKM.confusion_matrix(true_labels, predictions)

    metrics = {
        'Accuracy': SKM.accuracy_score(true_labels, predictions),
        'F1_Score': SKM.f1_score(true_labels, predictions),
        'Precision': SKM.precision_score(true_labels, predictions),
        'Recall': SKM.recall_score(true_labels, predictions),
        }
    
    res_amount = len(os.listdir(test_res_path))
    nonres_amount = len(os.listdir(test_nonres_path))
    counts = [nonres_amount, nonres_amount, res_amount, res_amount]
    group_names = ['Correctly predicted Nonres','Incorrectly predicted Res','Incorrectly predicted Nonres','Correctly predicted Res']
#     group_counts = ["{0:0.0f}".format(value) for value, count in zip(confusion_matrix.flatten(), counts)]
    group_percentages_and_counts = [f"{round(100*value/count, 2)} %\n {value} of {count}" for value, count in zip(confusion_matrix.flatten(), counts)]
    print(confusion_matrix.flatten())
    labels = [f"{v1}\n{v3}" for v1, v3 in zip(group_names,group_percentages_and_counts)]
    labels = np.asarray(labels).reshape(2,2)


    x_axis_labels = ['Predicted nonres', 'Predicted res']
    y_axis_labels = ['Actual nonres', 'Actual res']
    print(confusion_matrix)
    heatmap = sns.heatmap(
        SKM.confusion_matrix(true_labels, predictions, normalize='true'),
        ax=axes[idx],
        xticklabels=x_axis_labels,
        yticklabels=y_axis_labels,
        annot=labels, 
        fmt='')
    
    heatmap.set(
        title='Confusion matrix',
        xlabel='Predicted label',
        ylabel='Actual label'
    )

    heatmap.text(0, 2.45, f"Model: {model_name}", fontsize = 11)
#     scores = model.evaluate(test_generator, verbose=0)
    for idx, (metric, value) in enumerate(metrics.items()):
        heatmap.text(0, 0.11*idx + 2.6, f"{metric}: {round(value, 4)}", fontsize = 11)

In [None]:
# run evaluation over selected models
fig, axes = plt.subplots(1, len(model_paths), figsize=(25, 7))
for idx, model_name in enumerate(model_names):
    evaluate_model(idx, model_name)