In [22]:
#!pip install mlflow seaborn shap optuna
#!pip install 

# ✅ System & Utility Imports
import os
import shutil
import json
import datetime
import time
import random
import itertools
import argparse
import pickle
import joblib
import fnmatch
import io
import base64

# ✅ Data Handling & Processing
import pandas as pd
import numpy as np
import csv

# ✅ Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# ✅ Machine Learning & Model Evaluation
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.tree import plot_tree
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    roc_auc_score, average_precision_score, confusion_matrix, classification_report
)
import sklearn.preprocessing as SKP
import sklearn.metrics as SKM

# ✅ Deep Learning (TensorFlow / Keras)
import tensorflow as tf
import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Concatenate
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Adagrad
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback
from tensorflow.keras.metrics import Precision, Recall, AUC, CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.activations import selu
from tensorflow.keras.initializers import lecun_normal
from tensorflow.keras.layers import Input, Dense, AlphaDropout

# ✅ Feature Importance (SHAP)
import shap

# ✅ Geographic Data (if used for spatial analysis)
import geopandas as gpd
from shapely.geometry import Point

# ✅ Logging & Experiment Tracking
import mlflow
import mlflow.tensorflow

# ✅ Optimization & Hyperparameter Tuning
import optuna

# ✅ Math & Statistics
from scipy import stats
import scipy as SCP

# ✅ Progress Bars & Performance Monitoring
from tqdm import tqdm
import psutil
import gc

import optuna
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Adagrad
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Precision, Recall, AUC, CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
import numpy as np


In [25]:
#parquet file with attributes computed in previous steps
parquet_file = "Churu_height_NEW_GHSL_SMOD_elevations_BDensity_radius.parquet"
df_real_data = pd.read_parquet(parquet_file)

In [26]:
df_real_data.columns

Index(['perimeter_in_meters', 'building_faces', 'bf_source', 'confidence',
       'geometry', 'longitude', 'latitude', 'id', 'area_in_meters',
       'height_mean', 'height_median', 'height_max', 'height', 'floors',
       'gfa_in_meters', 'urban_split', 'ghsl_smod', 'elevation',
       'building_density_50', 'building_density_100', 'building_density_250',
       'building_density_500', 'building_perimeter_in_meters_new',
       'perimeter_to_area_ratio', 'normalized_perimeter_to_area_ratio',
       'centroid', 'radius_m', 'num_vertices', 'centroid_x', 'centroid_y',
       'nearest_road_type_1', 'distance_to_1', 'nearest_road_type_2',
       'distance_to_2', 'nearest_road_type_3', 'distance_to_3',
       'nearest_road_type_4', 'distance_to_4', 'road_density_for_4_fixed',
       'road_density_for_5_fixed', 'SQN', 'faces'],
      dtype='object')

In [27]:
normalize_area = 20_000
#normalize_height = 20
#normalize_smod = 6 
normalize_int_t=3300
normalize_int_distance=180
#avg_range_k5_capped=1100
normalize_road_count=30
normalize_road_density=5000
normalize_perim_to_area=7
#normalize_nearest_city=400
normalize_road_distance=10100
normalize_radius=100
#Anormalize_height_mean=13
#normalize_density_100=110
#normalize_density_500=1900
normalize_density_100=200
normalize_smod = 6 
normalize_perimeter = 500


In [28]:
#no of faces
# df_real_data["SQN"] = (4 * np.sqrt(df_real_data["area_in_meters"]) / df_real_data["building_perimeter_in_meters_new"])
# df_real_data.columns
# df_real_data["faces"] = df_real_data['num_vertices'] - 1
# df_real_data.loc[df_real_data["faces"] > 20, "faces"] = 20



In [29]:
df_real_data['area_in_meters'] = df_real_data['area_in_meters'] / normalize_area
df_real_data['distance_to_1'] = df_real_data['distance_to_1'] / 5000
df_real_data['distance_to_2'] = df_real_data['distance_to_2'] / 4000
df_real_data['distance_to_3'] = df_real_data['distance_to_3'] / 3000
df_real_data['distance_to_4'] = df_real_data['distance_to_4'] / 2000
df_real_data['road_density_for_4_fixed'] = df_real_data['road_density_for_4_fixed'] / 60_000
df_real_data['road_density_for_5_fixed'] = df_real_data['road_density_for_5_fixed'] / 75_000
df_real_data['building_density_100'] = df_real_data['building_density_100'] / normalize_density_100
df_real_data['SQN']
df_real_data['faces']
print("Real data shape:", df_real_data.shape)
df_real_data.columns


#this needs to be updated accordingly
df_real_dropped = df_real_data.drop(columns=['id', 'ghsl_smod', 'geometry', 'building_density_50', 'building_density_250', 'building_density_500', 
                                             'perimeter_to_area_ratio', 'centroid', 'num_vertices', 'centroid_x', 'centroid_y', 'nearest_road_type_1', 
                                             'nearest_road_type_2', 'nearest_road_type_3', 'nearest_road_type_4', 'latitude', 'longitude', "radius_m", 
                                             'normalized_perimeter_to_area_ratio', 'building_perimeter_in_meters_new', 'bf_source', 'urban_split', 
                                             'perimeter_in_meters', 'building_faces', 'confidence', 'height_mean', 'height_median', 'height_max', 'height', 
                                             'floors', 'gfa_in_meters', 'elevation'])


Real data shape: (1481134, 42)


In [30]:
df_real_dropped.columns

Index(['area_in_meters', 'building_density_100', 'distance_to_1',
       'distance_to_2', 'distance_to_3', 'distance_to_4',
       'road_density_for_4_fixed', 'road_density_for_5_fixed', 'SQN', 'faces'],
      dtype='object')

In [None]:
#!!!! model = your path to model
model = keras.models.load_model(r"model\neural_network_3cat (22).keras")

predictions = model.predict(df_real_dropped)

predicted_classes = np.argmax(predictions, axis=1)

# Convert the predicted classes
class_labels = ['Non-Residential', 'Residential', 'Industrial']
predicted_labels = [class_labels[i] for i in predicted_classes]



  saveable.load_own_variables(weights_store.get(inner_path))


[1m46286/46286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 700us/step


In [32]:
#final file
output_file = "Churu_building_footprint_classification.parquet"
df_real_data["prediction"] = predicted_labels
df_real_data.to_parquet(output_file)

In [33]:
df_real_data

Unnamed: 0,perimeter_in_meters,building_faces,bf_source,confidence,geometry,longitude,latitude,id,area_in_meters,height_mean,...,distance_to_2,nearest_road_type_3,distance_to_3,nearest_road_type_4,distance_to_4,road_density_for_4_fixed,road_density_for_5_fixed,SQN,faces,prediction
0,7.243843,4,google,0.7724,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,74.232350,27.726409,74.23234975129046:27.72640873664523,0.000125,0.000000,...,1.000000,tertiary,0.024701,unclassified,0.014643,0.040648,0.067323,0.873945,4,Residential
1,9.011801,4,google,0.7108,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,74.844915,28.873013,74.84491545220574:28.873013019801967,0.000126,1.000000,...,0.086930,tertiary,0.221931,residential,0.011467,0.148409,0.206338,0.703755,4,Residential
2,6.354292,4,google,0.7123,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,74.226555,27.630664,74.22655544425939:27.630663527650903,0.000126,1.722222,...,1.000000,tertiary,1.000000,track,0.588414,0.000000,0.000000,0.998249,4,Residential
3,6.957619,4,google,0.7604,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,75.014020,28.334861,75.0140202674914:28.334861398658834,0.000126,0.000000,...,0.516274,tertiary,0.524725,unclassified,0.077349,0.051521,0.000000,0.914346,4,Residential
4,8.217702,4,google,0.7845,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,74.570094,27.782683,74.57009354210085:27.782682537523247,0.000127,2.500000,...,1.000000,tertiary,0.026344,unclassified,0.059044,0.070790,0.000000,0.774335,4,Residential
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1481129,288.063775,4,google,0.9623,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...,74.471835,28.419541,74.47183471063026:28.41954127037166,0.255824,10.031507,...,1.000000,tertiary,0.211301,unclassified,0.032904,0.035079,0.063497,0.993246,4,Industrial
1481130,309.362995,6,google,0.8996,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x07\x00...,74.960137,28.195540,74.96013678708111:28.195540360926653,0.275961,6.671840,...,1.000000,tertiary,0.659801,unclassified,0.913392,0.000000,0.000000,0.960573,6,Non-Residential
1481131,444.405058,21,google,0.9111,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x16\x00...,75.380226,28.630323,75.38022567723831:28.630323167273463,0.276669,8.208935,...,1.000000,tertiary,0.107802,residential,0.173645,0.000000,0.000000,0.669540,20,Non-Residential
1481132,382.805856,16,google,0.9185,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x11\x00...,75.344910,28.390703,75.34491042205822:28.390703116146657,0.300631,7.102821,...,1.000000,tertiary,1.000000,unclassified,0.072797,0.062153,0.000000,0.810239,16,Industrial


In [35]:
import geopandas as gpd
from shapely import wkb

# Read broken parquet
df = pd.read_parquet(output_file)
df["geometry"] = df["geometry"].apply(wkb.loads)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:3857")

# Save with full GeoParquet metadata
gdf.to_parquet(output_file, engine="pyarrow")
gdf.head()



Unnamed: 0,perimeter_in_meters,building_faces,bf_source,confidence,geometry,longitude,latitude,id,area_in_meters,height_mean,...,distance_to_2,nearest_road_type_3,distance_to_3,nearest_road_type_4,distance_to_4,road_density_for_4_fixed,road_density_for_5_fixed,SQN,faces,prediction
0,7.243843,4,google,0.7724,"POLYGON ((8263508.924 3214523.343, 8263508.858...",74.23235,27.726409,74.23234975129046:27.72640873664523,0.000125,0.0,...,1.0,tertiary,0.024701,unclassified,0.014643,0.040648,0.067323,0.873945,4,Residential
1,9.011801,4,google,0.7108,"POLYGON ((8331698.419 3359491.171, 8331698.076...",74.844915,28.873013,74.84491545220574:28.873013019801967,0.000126,1.0,...,0.08693,tertiary,0.221931,residential,0.011467,0.148409,0.206338,0.703755,4,Residential
2,6.354292,4,google,0.7123,"POLYGON ((8262863.432 3202487.532, 8262863.149...",74.226555,27.630664,74.22655544425939:27.630663527650903,0.000126,1.722222,...,1.0,tertiary,1.0,track,0.588414,0.0,0.0,0.998249,4,Residential
3,6.957619,4,google,0.7604,"POLYGON ((8350523.934 3291257.556, 8350523.917...",75.01402,28.334861,75.0140202674914:28.334861398658834,0.000126,0.0,...,0.516274,tertiary,0.524725,unclassified,0.077349,0.051521,0.0,0.914346,4,Residential
4,8.217702,4,google,0.7845,"POLYGON ((8301105.491 3221600.715, 8301105.038...",74.570094,27.782683,74.57009354210085:27.782682537523247,0.000127,2.5,...,1.0,tertiary,0.026344,unclassified,0.059044,0.07079,0.0,0.774335,4,Residential
