In [1]:
!pip install numpy pandas scikit-learn pykrige matplotlib

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import statsmodels.api as sm
from mgwr.gwr import GWR
from mgwr.sel_bw import Sel_BW
import folium
from folium.plugins import MarkerCluster
import branca
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score

In [3]:
# Load the data
data = pd.read_excel('Dataset.xlsx')

row_count = data.shape[0]
print("Number of rows:", row_count)

print("\n", data.head(5))


# Prepare the data
y = data['LungCancerRate'].values.reshape(-1, 1)
X = data[['Smoking', 'Poverty', 'Insurance', 'Income', 'PM 2.5']].values

# GWR Model Fitting
coords = np.array(list(zip(data['Longitude'], data['Latitude'])))
bw = Sel_BW(coords, y, X).search()
gwr_model = GWR(coords, y, X, bw)
gwr_results = gwr_model.fit()

print(gwr_results.summary())

Number of rows: 3143

    FIPS   Parish   Latitude  Longitude    Income  Insurance    PM 2.5  \
0  1001  Autauga  32.535142 -86.642900  0.308876   0.828652  0.619048   
1  1003  Baldwin  30.727825 -87.722745  0.343421   0.772472  0.455782   
2  1005  Barbour  31.870090 -85.391068  0.151268   0.719101  0.578231   
3  1007     Bibb  32.998376 -87.126814  0.231939   0.755618  0.605442   
4  1009   Blount  33.980871 -86.567006  0.251355   0.707865  0.591837   

    Poverty   Smoking  LungCancerRate  
0  0.161165  0.424242        0.311271  
1  0.135922  0.478788        0.288991  
2  0.403883  0.515152        0.338794  
3  0.316505  0.590909        0.412844  
4  0.198058  0.578788        0.333552  
Model type                                                         Gaussian
Number of observations:                                                3143
Number of covariates:                                                     6

Global Regression Results
-------------------------------------------

In [4]:
# Getting GWR predictions
gwr_predictions = gwr_model.predict(coords, X)
gwr_fitted = gwr_predictions.predictions.flatten()
data['gwr_r2'] = gwr_results.localR2
data['predicted_lung_cancer'] = gwr_fitted

# Calculate MAE and R² for the GWR model
mae = mean_absolute_error(y, gwr_fitted)
r_squared = r2_score(y, gwr_fitted)

print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'R-squared (R²): {r_squared:.4f}')

Mean Absolute Error (MAE): 0.0475
R-squared (R²): 0.6585


In [5]:
# Plotting the GWR results on an interactive map
m = folium.Map(location=[31.0, -92.0], zoom_start=4)

marker_cluster = MarkerCluster().add_to(m)
for idx, row in data.iterrows():
    folium.Marker(
        location=(row['Latitude'], row['Longitude']),
        popup=(
            f"<strong>Parish:</strong> {row['Parish']}<br>"
            f"<strong>Predicted Lung Cancer Rate:</strong> {row['predicted_lung_cancer']:.2f}"
        ),
        icon=folium.Icon(color='red')
    ).add_to(marker_cluster)

# Displaying the map
m