In [1]:
import os
os.getcwd()
os.chdir("G:/OneDrive/casa0010dissertation/00_06 jaipur code_mgwr")


In [2]:
import numpy as np
import geopandas as gpd
import json
import pandas as pd


# 1 Rescale the grid variavles as i did in GWR

In [3]:
grid_with_wards = gpd.read_file('data/cleaned/grid_with_wards_mgwr_03.geojson')

In [4]:
grid_with_wards.columns

Index(['grid_id', 'row', 'col', 'illum_vol_density', 'light_intensity',
       'poi_kde', 'Id', 'Ward_No', 'POP', 'intercept', 'illum_vol_estimate',
       'light_intensity_estimate', 'poi_kde_estimate', 'poi_kde_scaled',
       'geometry'],
      dtype='object')

In [5]:
with open("mgwr_test/ward_scalers.json", encoding="utf-8") as f:
    params = json.load(f)

In [6]:
# --- 1. Rename columns for clarity and consistency ---

# Define the renaming mapping

grid_pred_data = grid_with_wards.copy()


rename_mapping = {
    'intercept': 'beta_intercept',
    'illum_vol_estimate': 'beta_illum_vol',
    'light_intensity_estimate': 'beta_light_intensity',
    'poi_kde_estimate': 'beta_poi_kde'
}
grid_pred_data.rename(columns=rename_mapping, inplace=True)
print("--- Coefficient columns have been renamed for clarity ---")


--- Coefficient columns have been renamed for clarity ---


In [7]:
# --- 2(Scaling) ---

x_cols = ['illum_vol_density', 'light_intensity', 'poi_kde_scaled']

print("\n--- Standardizing grid-level variables using ward-level parameters... ---")

for col in x_cols:
    #  WARD level parameters for each variable
    ward_mu = params['x_params'][col]['mu']
    ward_std = params['x_params'][col]['std']

    #  Log1p
    log_x_grid = np.log1p(grid_pred_data[col])

    #  WARD  Z-score using the ward parameters
    z_score_col_name = col + '_std'
    grid_pred_data[z_score_col_name] = (log_x_grid - ward_mu) / ward_std

print("--- Standardization complete. ---")


--- Standardizing grid-level variables using ward-level parameters... ---
--- Standardization complete. ---


In [8]:
 grid_pred_data.columns

Index(['grid_id', 'row', 'col', 'illum_vol_density', 'light_intensity',
       'poi_kde', 'Id', 'Ward_No', 'POP', 'beta_intercept', 'beta_illum_vol',
       'beta_light_intensity', 'beta_poi_kde', 'poi_kde_scaled', 'geometry',
       'illum_vol_density_std', 'light_intensity_std', 'poi_kde_scaled_std'],
      dtype='object')

In [17]:
 grid_pred_data[['beta_intercept','illum_vol_density_std', 'light_intensity_std', 'poi_kde_scaled_std']].describe()


Unnamed: 0,beta_intercept,illum_vol_density_std,light_intensity_std,poi_kde_scaled_std
count,38906.0,38906.0,38906.0,38906.0
mean,0.680088,-17.817689,-6.939641,-1.576639
std,1.928375,6.912239,5.082476,1.223289
min,-3.712855,-22.977989,-10.716118,-2.402737
25%,0.0,-22.977989,-10.716118,-2.402737
50%,0.0,-22.977989,-10.716118,-2.402737
75%,0.0,-8.977886,-0.482834,-0.735324
max,6.094717,-6.569271,2.704089,1.999526


# 2 get grid prediction data

In [10]:
# --- 3. Final and safe prediction with inverse transformation ---

# a. Define matching variable and coefficient column names
z_illum_col = 'illum_vol_density_std'
z_light_col = 'light_intensity_std'
z_poi_col = 'poi_kde_scaled_std'  # Make sure column names match

beta_intercept_col = 'beta_intercept'
beta_illum_col = 'beta_illum_vol'
beta_light_col = 'beta_light_intensity'
beta_poi_col = 'beta_poi_kde'


# b. Perform precise prediction calculation
z_illum_vals =  grid_pred_data[z_illum_col].values
z_light_vals =  grid_pred_data[z_light_col].values
z_poi_vals =  grid_pred_data[z_poi_col].values

beta_intercept_vals =  grid_pred_data[beta_intercept_col].values
beta_illum_vals =  grid_pred_data[beta_illum_col].values
beta_light_vals =  grid_pred_data[beta_light_col].values
beta_poi_vals =  grid_pred_data[beta_poi_col].values

#c. Compute: y_pred_z = beta_0 + beta_1*z1 + ...
Predicted_Z_Y = (
    beta_intercept_vals +
    (beta_illum_vals * z_illum_vals) +
    (beta_light_vals * z_light_vals) +
    (beta_poi_vals * z_poi_vals)
)

grid_pred_data['Predicted_Z_Y'] = Predicted_Z_Y


In [11]:
# Z-score predictions
print("\n--- Descriptive Statistics for Z-score predictions ")
print( grid_pred_data['Predicted_Z_Y'].describe())


--- Descriptive Statistics for Z-score predictions 
count    38906.000000
mean       199.142085
std         76.192424
min         56.162870
25%        111.487164
50%        212.066507
75%        266.004875
max        316.913547
Name: Predicted_Z_Y, dtype: float64


In [12]:
# d. get Predicted_log_Y
y_mu_ward = params['y_params']['mu']
y_std_ward = params['y_params']['std']
grid_pred_data['Predicted_log_Y'] = (grid_pred_data['Predicted_Z_Y'] * y_std_ward) + y_mu_ward
print(grid_pred_data['Predicted_log_Y'].describe())

count    38906.000000
mean       208.887488
std         78.063204
min         62.397654
25%        119.080345
50%        222.129248
75%        277.391984
max        329.550635
Name: Predicted_log_Y, dtype: float64


In [13]:
grid_pred_data.columns

Index(['grid_id', 'row', 'col', 'illum_vol_density', 'light_intensity',
       'poi_kde', 'Id', 'Ward_No', 'POP', 'beta_intercept', 'beta_illum_vol',
       'beta_light_intensity', 'beta_poi_kde', 'poi_kde_scaled', 'geometry',
       'illum_vol_density_std', 'light_intensity_std', 'poi_kde_scaled_std',
       'Predicted_Z_Y', 'Predicted_log_Y'],
      dtype='object')

In [14]:
grid_pred_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 38906 entries, 0 to 38905
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   grid_id                38906 non-null  int32   
 1   row                    38906 non-null  int32   
 2   col                    38906 non-null  int32   
 3   illum_vol_density      38906 non-null  float64 
 4   light_intensity        38906 non-null  float64 
 5   poi_kde                38906 non-null  float64 
 6   Id                     38906 non-null  float64 
 7   Ward_No                38906 non-null  float64 
 8   POP                    38906 non-null  float64 
 9   beta_intercept         38906 non-null  float64 
 10  beta_illum_vol         38906 non-null  float64 
 11  beta_light_intensity   38906 non-null  float64 
 12  beta_poi_kde           38906 non-null  float64 
 13  poi_kde_scaled         38906 non-null  float64 
 14  geometry               38906 n

In [15]:
grid_pred_data.to_file('data/cleaned/grid_data_for_weights_mgwr03.geojson', driver='GeoJSON')