## Imports and Setup

In [1]:
import os
import sys
import joblib
import subprocess

import pandas as pd
import numpy as np
import geopandas as gpd
from tqdm.notebook import tqdm

sys.path.insert(0, "../utils/")
import pred_utils
import config
import geoutils
import eval_utils
import fusion_utils

import logging
logging.basicConfig(level=logging.INFO)
pd.set_option('mode.chained_assignment', None)

%load_ext autoreload
%autoreload 2



## Generate Predictions

In [2]:
filename = '../data/vectors/bldgs_ortho_DOM.gpkg'
bldgs = gpd.read_file(filename).reset_index(drop=True)[['UID', 'geometry']].dropna()
print(bldgs.shape)
bldgs.head(3)

(50410, 2)


Unnamed: 0,UID,geometry
0,0,MULTIPOLYGON Z (((675912.759 1682212.635 99.94...
1,1,MULTIPOLYGON Z (((675731.846 1682256.863 68.50...
2,2,MULTIPOLYGON Z (((675723.312 1682258.156 67.15...


In [3]:
in_file = '../data/rasters/ortho/ortho_DOM.tif'
exp_config = '../configs/cnn/cnn-roof_material-resnet50-RGB_DOM.yaml'
c = config.load_config(exp_config, prefix='.')
bldgs = pred_utils.predict_image(bldgs, in_file, exp_config, prefix='.')
bldgs.head(3)

INFO:root:Config: {'attribute': 'roof_material', 'data': 'RGB_DOM', 'mode': 'RGB', 'batch_size': 32, 'n_workers': 4, 'n_epochs': 60, 'model': 'resnet50', 'resampler': 'RandomOverSampler', 'pretrained': True, 'scheduler': 'ReduceLROnPlateau', 'optimizer': 'Adam', 'label_smoothing': 0.1, 'lr': 1e-05, 'img_size': 224, 'patience': 7, 'dropout': 0.5, 'momentum': None, 'gamma': None, 'step_size': None, 'config_name': 'cnn-roof_material-resnet50-RGB_DOM', 'rasters_dir': '../data/rasters/', 'vectors_dir': '../data/vectors/', 'tile_dir': '../data/tiles/', 'csv_dir': '../data/csv/', 'out_dir': '../outputs/', 'exp_dir': '../exp/', 'log_dir': '../logs/'}
INFO:root:Model file ../exp/cnn-roof_material-resnet50-RGB_DOM\cnn-roof_material-resnet50-RGB_DOM.pth successfully loaded.
100%|███████████████| 50410/50410 [3:53:43<00:00,  3.59it/s]                                                           


Unnamed: 0,UID,geometry,ROOF_MATERIAL,ROOF_MATERIAL_PROB,INCOMPLETE_PROB,HEALTHY_METAL_PROB,IRREGULAR_METAL_PROB,CONCRETE_CEMENT_PROB,BLUE_TARP_PROB
0,0,MULTIPOLYGON Z (((675912.759 1682212.635 99.94...,BLUE_TARP,0.357583,0.048491,0.147591,0.272941,0.173394,0.357583
1,1,MULTIPOLYGON Z (((675731.846 1682256.863 68.50...,IRREGULAR_METAL,0.769993,0.028491,0.16016,0.769993,0.018792,0.022564
2,2,MULTIPOLYGON Z (((675723.312 1682258.156 67.15...,IRREGULAR_METAL,0.953958,0.010803,0.012412,0.953958,0.005902,0.016925


In [4]:
exp_config = '../configs/cnn/cnn-roof_type-inceptionv3-RGB_DOM_LCA.yaml'
c = config.load_config(exp_config, prefix='.')
bldgs = pred_utils.predict_image(bldgs, in_file, exp_config, prefix='.')
bldgs.head(3)

INFO:root:Config: {'attribute': 'roof_type', 'data': 'RGB_DOM_LCA', 'mode': 'RGB', 'batch_size': 32, 'n_workers': 4, 'n_epochs': 60, 'model': 'inceptionv3', 'pretrained': True, 'resampler': 'RandomOverSampler', 'scheduler': 'ReduceLROnPlateau', 'optimizer': 'Adam', 'label_smoothing': 0.1, 'lr': 1e-05, 'img_size': 299, 'momentum': None, 'gamma': None, 'step_size': None, 'patience': 7, 'dropout': None, 'config_name': 'cnn-roof_type-inceptionv3-RGB_DOM_LCA', 'rasters_dir': '../data/rasters/', 'vectors_dir': '../data/vectors/', 'tile_dir': '../data/tiles/', 'csv_dir': '../data/csv/', 'out_dir': '../outputs/', 'exp_dir': '../exp/', 'log_dir': '../logs/'}
INFO:root:Model file ../exp/cnn-roof_type-inceptionv3-RGB_DOM_LCA\cnn-roof_type-inceptionv3-RGB_DOM_LCA.pth successfully loaded.
100%|███████████████| 50410/50410 [4:45:17<00:00,  2.94it/s]                                                           


Unnamed: 0,UID,geometry,ROOF_MATERIAL,ROOF_MATERIAL_PROB,INCOMPLETE_PROB,HEALTHY_METAL_PROB,IRREGULAR_METAL_PROB,CONCRETE_CEMENT_PROB,BLUE_TARP_PROB,ROOF_TYPE,ROOF_TYPE_PROB,NO_ROOF_PROB,GABLE_PROB,HIP_PROB,FLAT_PROB
0,0,MULTIPOLYGON Z (((675912.759 1682212.635 99.94...,BLUE_TARP,0.357583,0.048491,0.147591,0.272941,0.173394,0.357583,GABLE,0.737536,0.074953,0.737536,0.087074,0.100438
1,1,MULTIPOLYGON Z (((675731.846 1682256.863 68.50...,IRREGULAR_METAL,0.769993,0.028491,0.16016,0.769993,0.018792,0.022564,GABLE,0.937156,0.017032,0.937156,0.028361,0.017451
2,2,MULTIPOLYGON Z (((675723.312 1682258.156 67.15...,IRREGULAR_METAL,0.953958,0.010803,0.012412,0.953958,0.005902,0.016925,GABLE,0.873958,0.024965,0.873958,0.056632,0.044446


In [5]:
name = 'ortho_DOM'
out_path = os.path.join(c['out_dir'])
if not os.path.isdir(out_path):
    os.makedirs(out_path)
out_file = os.path.join(out_path, f'{name}.gpkg')
bldgs.to_file(out_file, driver='GPKG')

## Model Evaluation

In [None]:
ytrue_file = '../data/vectors/building_footprints_annotated_COLIHAUT.gpkg'
ytrue = gpd.read_file(ytrue_file)[['UID', 'roof_type', 'roof_material', 'geometry']].dropna()
ytrue.roof_type = ytrue.roof_type.replace({'PYRAMID': 'HIP', 'HALF_HIP': 'HIP'})
print(f"Data dimensions: {ytrue.shape}")
print(ytrue.roof_type.value_counts())
print(ytrue.roof_material.value_counts())
ytrue.head(3)

In [None]:
ypred_file = f'../output/{name}-drone-20230616.gpkg'
ypred = gpd.read_file(ypred_file)[['UID', 'roof_type', 'roof_material', 'geometry']].dropna()
print(f"Data dimensions: {ypred.shape}")
print(ypred.roof_type.value_counts())
print(ypred.roof_material.value_counts())
ypred.head(3)

In [None]:
attribute = 'roof_type'
classes = geoutils.classes_dict[attribute]
cm = eval_utils.get_confusion_matrix(ytrue[attribute], ypred[attribute], classes)
eval_utils.evaluate(ytrue[attribute], ypred[attribute])

## Data Fusion

In [19]:
filename = '../data/vectors/bldgs_ortho_DOM.gpkg'
bldgs = gpd.read_file(filename).reset_index(drop=True)[['UID', 'geometry']].dropna()
print(bldgs.shape)
bldgs.head(3)

(50410, 2)


Unnamed: 0,UID,geometry
0,0,MULTIPOLYGON Z (((675912.759 1682212.635 99.94...
1,1,MULTIPOLYGON Z (((675731.846 1682256.863 68.50...
2,2,MULTIPOLYGON Z (((675723.312 1682258.156 67.15...


In [8]:
c = config.load_config("../configs/fusion/fusion_LR_embeds.yaml", prefix='.')
c = {key: '.' + value if 'config1' in key or 'config2' in key else value for key, value in c.items()}
print(c)

c1 = config.load_config(c['config1'], prefix='.')
classes = geoutils.get_classes_dict(c1['attribute'])
model1 = pred_utils.load_model(c1, classes=classes)
print(c1)

c2 = config.load_config(c['config2'], prefix='.')
model2 = pred_utils.load_model(c2, classes=classes)
print(c2)

{'config1': '../configs/cnn/cnn-roof_type-efficientnetb0-RGB_DOM.yaml', 'config2': '../configs/cnn/cnn-roof_type-inceptionv3-LIDAR_DOM.yaml', 'mode': 'fusion_embeds', 'model': 'LogisticRegression', 'model_params': {'solver': ['lbfgs', 'liblinear'], 'penalty': ['l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1.0]}, 'selector': None, 'selector_params': None, 'scalers': ['MinMaxScaler', 'StandardScaler', 'RobustScaler'], 'cv': 'GridSearchCV', 'cv_params': {'refit': 'f1_score', 'verbose': 2, 'n_jobs': -1, 'cv': 5}, 'config_name': 'fusion_LR_embeds', 'rasters_dir': '../data/rasters/', 'vectors_dir': '../data/vectors/', 'tile_dir': '../data/tiles/', 'csv_dir': '../data/csv/', 'out_dir': '../outputs/', 'exp_dir': '../exp/', 'log_dir': '../logs/'}


INFO:root:Model file ../exp/cnn-roof_type-efficientnetb0-RGB_DOM\cnn-roof_type-efficientnetb0-RGB_DOM.pth successfully loaded.


{'attribute': 'roof_type', 'data': 'RGB_DOM', 'mode': 'RGB', 'batch_size': 32, 'n_workers': 4, 'n_epochs': 60, 'model': 'efficientnetb0', 'pretrained': True, 'resampler': 'RandomOverSampler', 'scheduler': 'ReduceLROnPlateau', 'optimizer': 'Adam', 'label_smoothing': 0.1, 'lr': 1e-05, 'img_size': 224, 'momentum': None, 'gamma': None, 'step_size': None, 'patience': 7, 'dropout': None, 'config_name': 'cnn-roof_type-efficientnetb0-RGB_DOM', 'rasters_dir': '../data/rasters/', 'vectors_dir': '../data/vectors/', 'tile_dir': '../data/tiles/', 'csv_dir': '../data/csv/', 'out_dir': '../outputs/', 'exp_dir': '../exp/', 'log_dir': '../logs/'}


INFO:root:Model file ../exp/cnn-roof_type-inceptionv3-LIDAR_DOM\cnn-roof_type-inceptionv3-LIDAR_DOM.pth successfully loaded.


{'attribute': 'roof_type', 'data': 'LIDAR_DOM', 'mode': 'LIDAR', 'batch_size': 32, 'n_workers': 4, 'n_epochs': 60, 'model': 'inceptionv3', 'pretrained': True, 'resampler': 'RandomOverSampler', 'scheduler': 'ReduceLROnPlateau', 'optimizer': 'Adam', 'label_smoothing': 0.1, 'lr': 1e-05, 'img_size': 299, 'momentum': None, 'gamma': None, 'step_size': None, 'patience': 7, 'dropout': None, 'config_name': 'cnn-roof_type-inceptionv3-LIDAR_DOM', 'rasters_dir': '../data/rasters/', 'vectors_dir': '../data/vectors/', 'tile_dir': '../data/tiles/', 'csv_dir': '../data/csv/', 'out_dir': '../outputs/', 'exp_dir': '../exp/', 'log_dir': '../logs/'}


In [9]:
source1 = '../data/rasters/ortho/ortho_DOM.tif'
source2 = '../data/rasters/lidar/ndsm_DOM.tif'
data = fusion_utils.predict(bldgs, c1, c2, model1, model2, source1=source1, source2=source2)

100%|███████████████| 50410/50410 [6:36:20<00:00,  2.12it/s]                                                           


In [12]:
data.to_csv(os.path.join(c['out_dir'], 'ortho_DOM.csv'))

In [14]:
features = fusion_utils.get_features(c, data)
model_file = os.path.join(
    c['exp_dir'], 
    c['config_name'], 
    c['mode'], 
    c['model'], 
    f"{c['config_name']}.pkl"
)
model_file

'../exp/fusion_LR_embeds\\fusion_embeds\\LogisticRegression\\fusion_LR_embeds.pkl'

In [23]:
model = joblib.load(model_file)
preds = model.predict(data[features])
probs = model.predict_proba(data[features])
probs_col = [f"{classes[index]}_PROB" for index in range(len(classes))]
probs = pd.DataFrame(probs, columns=probs_col)
probs.head(3)

Unnamed: 0,NO_ROOF_PROB,GABLE_PROB,HIP_PROB,FLAT_PROB
0,0.085731,0.02149,0.002926,0.889853
1,0.000974,0.981742,0.001839,0.015445
2,0.001433,0.979017,0.002901,0.016649


In [24]:
name = 'ortho_lidar_DOM'
bldgs[c1["attribute"]] = preds
bldgs[f"{c1['attribute']}_PROB"] = probs.max(axis=1)
results = gpd.GeoDataFrame(pd.concat([bldgs, probs], axis=1))
results.columns = [
    col.upper() if col != "geometry" else col for col in results.columns
]
out_file = os.path.join(c['out_dir'], f'{name}.gpkg')
results.to_file(out_file, driver='GPKG')