In [None]:
from rasterio import features
from folium.plugins import Search
import rasterio, folium, fiona, warnings
from shapely.geometry import Point, Polygon, mapping
from shapely.ops import nearest_points, unary_union   # correct
from shapely.strtree import STRtree
from scipy.spatial import distance_matrix

In [None]:
if "IMI_adj" not in ABT.columns:
    
    ABT["IMI_adj"] = None
    
    # ======================================================
    # 1. INITIALIZE THE IMI ENGINE
    # ======================================================
    imi_engine = IMICompactness(
        buildings=building_footprint_dataset,
        waterbody=waterbody,
        crs=ABT.crs
    )
    
    # ======================================================
    # 2. HELPER FUNCTION TO COMPUTE IMI FOR ONE SUBDIVISION
    # ======================================================
    def compute_imi_adj_for_row(sub_geom):
        """
        Computes IMI_adjusted for one subdivision polygon.
    
        Returns:
            float or None
        """
        try:
            imi_value, _ = imi_engine.compute_for_subdivision(
                subdivision=sub_geom,
                resolution=5,     # 5m cell size = good speed/accuracy balance
                frac=1.0,         # use 100% of buildings
                adjusted=True     # Compute IMI_adjusted !!!
            )
            return imi_value
        except Exception as e:
            print("Error computing IMI for subdivision:", e)
            return None

    
    # ======================================================
    # 3. APPLY TO ALL SUBDIVISIONS WITH PROGRESS BAR
    # ======================================================
    tqdm.pandas()
    print("Computing IMI_adjusted for all subdivisions...")
    ABT["IMI_adj"] = ABT.geometry.progress_apply(compute_imi_adj_for_row)
    print("DONE.")
    
else:
    print("âœ… IMI Index already exist).")

In [None]:
# ----------------------------------------------------------
# 0. SELECT METRICS
# ----------------------------------------------------------
metrics_used = [
    "AI", 
    "ENN_MN",
    "PROX",
    "ED",
    "SHAPE_MN",
    "FRAC_MN"
]

df = ABT.copy()

# ----------------------------------------------------------
# 1. DIRECTIONAL ALIGNMENT
# ----------------------------------------------------------
df["ENN_inv"] = 1 / (df["ENN_MN"] + 1e-6)
df["ED_inv"]  = 1 / (df["ED"] + 1e-6)

metrics_final = [
    "AI", 
    "PROX",
    "SHAPE_MN",
    "FRAC_MN",
    "ENN_inv",
    "ED_inv"
]

# Extract matrix
X = df[metrics_final].copy()

# ----------------------------------------------------------
# 2. HANDLE MISSING VALUES
# ----------------------------------------------------------
# Drop rows where ALL metrics are missing
X = X.dropna(how="all")

# Impute remaining missing values with column means
X_imputed = X.fillna(X.mean())

# ----------------------------------------------------------
# 3. Z-SCORE NORMALIZATION
# ----------------------------------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

X_norm_df = pd.DataFrame(
    X_scaled, 
    index=X_imputed.index, 
    columns=[m + "_z" for m in metrics_final]
)

# ----------------------------------------------------------
# 4. PCA (PC1 as compactness index)
# ----------------------------------------------------------
pca = PCA(n_components=1)
PC1_scores = pca.fit_transform(X_scaled).flatten()

df.loc[X_norm_df.index, "COMPACTNESS_PCA"] = PC1_scores

# ----------------------------------------------------------
# 5. PCA LOADINGS
# ----------------------------------------------------------
loadings = pd.Series(
    pca.components_[0],
    index=metrics_final,
    name="PC1_Loading"
)

abs_weights = (loadings.abs() / loadings.abs().sum())
abs_weights.name = "PCA_Weight"

print("\n===== PCA LOADINGS =====")
print(loadings)

print("\n===== PCA WEIGHTS (Normalized ABS values) =====")
print(abs_weights)

print("\nExplained variance (PC1):", pca.explained_variance_ratio_[0])

# ----------------------------------------------------------
# 6. STORE RESULTS
# ----------------------------------------------------------
ABT["COMPACTNESS_PCA"] = df["COMPACTNESS_PCA"]

print("\nðŸŽ¯ PCA Compactness Index successfully created (NaN-safe)!")


In [None]:
# if 'FAR' not in ABT.columns:
#     
#     ABT["FAR"] = None
#     
#     steps = [
#         "Overlaying parcels and subdivisions...",
#         "Computing overlap ratios...",
#         "Filtering parcels with â‰¥80% overlap...",
#         "Summing total floor area per subdivision...",
#         "Merging and calculating FAR...",
#         "Cleaning up columns..."
#     ]
#     
#     tax_parcel_cama_dataset["Shape_Area"] = tax_parcel_cama_dataset.geometry.area
#     for step in tqdm(steps, desc="Processing FAR computation", ncols=100):
#         if step == steps[0]:
#             # --- 1. Overlay parcels and subdivisions to find intersections ---
#             intersections = gpd.overlay(
#                 tax_parcel_cama_dataset[["parcelid", "totalarea","Shape_Area", "geometry"]],
#                 ABT[["subd_id", "geometry"]],
#                 how="intersection",
#                 keep_geom_type=True
#             )
# 
#         elif step == steps[1]:
#             # --- 2. Compute overlap ratio for each intersected parcel ---
#             intersections["intersect_area"] = intersections.geometry.area
#             intersections["overlap_ratio"] = intersections["intersect_area"] / intersections["Shape_Area"]
# 
#         elif step == steps[2]:
#             # --- 3. Keep only parcels with â‰¥ 80% of their area inside subdivision ---
#             intersections = intersections[intersections["overlap_ratio"] >= 0.4]
# 
#         elif step == steps[3]:
#             # --- 4. Compute true subdivision-level FAR (Î£ total floor area Ã· ABT area) ---
#             far_sum = intersections.groupby("subd_id")["totalarea"].sum().reset_index(name="total_floor_area")
# 
#         elif step == steps[4]:
#             # Add subdivision area and merge
#             ABT["abt_area"] = ABT.geometry.area
#             ABT = ABT.merge(far_sum, on="subd_id", how="left")
#             ABT["FAR"] = ABT["total_floor_area"] / ABT["abt_area"]
#             ABT["FAR"] = ABT["FAR"].fillna(0).round(4)
# 
#         elif step == steps[5]:
#             # --- 6. Drop temporary columns if desired ---
#             ABT = ABT.drop(columns=["abt_area"], errors="ignore")
#             
# else:
#     print("âœ… FAR Index already exist).")

In [None]:
# if not all(col in ABT.columns for col in ['APT', 'APT_station', 'AUO','AUO_Grocery']):
#     ABT = pd.merge(ABT,accessibility_dataset, on="subd_id", how="left")
# if not all(col in ABT.columns for col in ['walk_time_min_transit', 'walk_time_min_amenity']):
#     ABT = pd.merge(ABT,accessibility_dataset_osm, on="subd_id", how="left")
# if "avg_time_transit" not in ABT.columns:
#     ABT["avg_time_transit"] = np.nan
#     ABT["avg_time_transit"] = (ABT["walk_time_min_osm"] + ABT["APT"])/ 2
# else:
#     print("âœ… All accessibility columns already exist.")
#     
# #Fill missing walk_time_min_osm values using APT
# # ABT['walk_time_min_osm'] = ABT['walk_time_min_osm'].fillna(ABT['APT'])

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# -----------------------------------------
# CUSTOM VARIMAX (works for any k â‰¥ 1)
# -----------------------------------------
def varimax(Phi, gamma=1.0, q=20, tol=1e-6):
    Phi = np.asarray(Phi)
    p, k = Phi.shape

    # 1-factor: rotation is identity
    if k == 1:
        return Phi

    R = np.eye(k)
    d = 0

    for i in range(q):
        d_old = d
        Lambda = Phi @ R
        u, s, vh = np.linalg.svd(
            Phi.T @ (Lambda**3 - (gamma/p) * Lambda @ np.diag(np.diag(Lambda.T @ Lambda)))
        )
        R = u @ vh
        d = np.sum(s)
        if d_old != 0 and d / d_old < 1 + tol:
            break

    return Phi @ R


# -----------------------------------------
# 0. PREP â€” Fix directional metrics
# -----------------------------------------
df = ABT.copy()
df["ENN_inv"] = 1 / (df["ENN_MN"] + 1e-6)
df["ED_inv"]  = 1 / (df["ED"] + 1e-6)

metrics_final = ['AI', 'PROX', 'ENN_inv', 'ED_inv', 'SHAPE_MN', 'FRAC_MN']

X = df[metrics_final].copy().dropna(how="all")
X_imputed = X.fillna(X.mean())

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)


# ========================================================
# MAIN FUNCTION: RUN PCA + ROTATION FOR ANY k COMPONENTS
# ========================================================
def run_pca_rotated(X_scaled, metrics, n_components):

    print("\n" + "="*70)
    print(f"       ðŸ”· PCA WITH {n_components} ROTATED COMPONENTS")
    print("="*70)

    # -------------------------
    # Run PCA
    # -------------------------
    pca = PCA(n_components=n_components)
    scores = pca.fit_transform(X_scaled)
    loadings = pca.components_.T
    eigenvalues = pca.explained_variance_

    print("\n=== EIGENVALUES ===")
    for i, eig in enumerate(eigenvalues):
        print(f"PC{i+1}: {eig:.4f}")

    # -------------------------
    # Rotate loadings
    # -------------------------
    rotated = varimax(loadings)

    print("\n=== ROTATED LOADINGS ===")
    rotated_df = pd.DataFrame(rotated, index=metrics, columns=[f"RC{i+1}" for i in range(n_components)])
    print(rotated_df)

    # -------------------------
    # Compute normalized weights per component
    # -------------------------
    print("\n=== NORMALIZED WEIGHTS PER COMPONENT ===")
    for comp in range(n_components):
        abs_load = np.abs(rotated[:, comp])
        weights = abs_load / abs_load.sum()
        print(f"\n--- Component RC{comp+1} Weights ---")
        print(pd.Series(weights, index=metrics))

    return {
        "eigenvalues": eigenvalues,
        "rotated_loadings": rotated_df,
        "scores": scores,
    }


# ========================================================
# RUN FOR 2, 3, AND 4 COMPONENTS
# ========================================================
results_2 = run_pca_rotated(X_scaled, metrics_final, 2)
results_3 = run_pca_rotated(X_scaled, metrics_final, 3)
results_4 = run_pca_rotated(X_scaled, metrics_final, 4)

print("\nðŸŽ¯ Completed all PCA rotated solutions (2, 3, 4 components).")