In [2]:
# %% ---------------------------------------------------------------------------
# 1. Imports & configuration
# ------------------------------------------------------------------------------
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import folium
from folium.plugins import HeatMap  # still handy if you later want a heatmap
import h3pandas  # adds the .h3 accessor to pandas / GeoPandas
import joblib
import geopandas as gpd
from shapely.geometry import Point

# Database connection parameters – UPDATE IF NECESSARY
DB_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "osm",
    "user": "postgres",
    "password": "postpass",
}
CONN_STRING = (
    f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
    f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
)

# Features used during training **must** match here
FEATURE_COLUMNS = [
    "bench_count",
    "cafe_count",
    "pharmacy_count",
    "waste_disposal_count",
    "atm_count",
    "post_office_count",
    "bank_count",
    "restaurant_count",
    "waste_basket_count",
    "fuel_count",
    "shelter_count",
    "toilets_count",
    "fast_food_count",
    "place_of_worship_count",
    "bicycle_parking_count",
    "parking_count",
    "bar_count",
    "dentist_count",
    "drinking_water_count",
    "clinic_count",
    "car_wash_count",
    "payment_terminal_count",
    "recycling_count",
    "library_count",
    "school_count",
    "community_centre_count",
    "vending_machine_count",
    "pub_count",
    "bureau_de_change_count",
    "doctors_count",
    "convenience_count",
    "clothes_count",
    "supermarket_count",
    "hairdresser_count",
    "yes_count",
    "car_repair_count",
    "beauty_count",
    "hardware_count",
    "alcohol_count",
    "car_parts_count",
    "butcher_count",
    "chemist_count",
    "bakery_count",
    "mobile_phone_count",
    "electronics_count",
    "doityourself_count",
    "furniture_count",
    "florist_count",
    "kiosk_count",
    "pawnbroker_count",
    "pet_count",
    "shoes_count",
    "confectionery_count",
    "optician_count",
    "cosmetics_count",
    "jewelry_count",
    "general_count",
    "travel_agency_count",
    "variety_store_count",
    "greengrocer_count",
    "atb_count",
    "novus_count",
    "eko_market_count",
    "fora_count",
]

MODEL_PATH = "aurora_location_model.pkl"

# %% ---------------------------------------------------------------------------
# 2. Data & model loading
# ------------------------------------------------------------------------------

def load_lviv_data(conn_string: str) -> pd.DataFrame:
    """Load **all** Lviv hexagons from the database (no boundary filtering)."""
    print("Connecting to the database …")
    engine = create_engine(conn_string)
    query = "SELECT * FROM osm_loc_alike_lviv"
    df = pd.read_sql(query, engine)
    print(f"Loaded {len(df):,} hexagons for Lviv.")
    return df


def load_model(path: str):
    """Load the trained Aurora location model from disk."""
    print(f"Loading model from '{path}' …")
    model = joblib.load(path)
    print("Model loaded OK.")
    return model


# %% ---------------------------------------------------------------------------
# 3. Prediction helpers
# ------------------------------------------------------------------------------

def predict_probabilities(model, df: pd.DataFrame) -> pd.DataFrame:
    """Return a copy of *df* with an added `aurora_probability` column."""
    df = df.copy()

    # Ensure the training features are present – missing -> 0
    X = df.reindex(columns=FEATURE_COLUMNS).fillna(0)

    # Predict
    print("Predicting probabilities …")
    df["aurora_probability"] = model.predict_proba(X)[:, 1]
    print("Prediction finished.")
    return df


# %% ---------------------------------------------------------------------------
# 4. Geometry helpers
# ------------------------------------------------------------------------------

def add_hexagon_geometry(df: pd.DataFrame) -> gpd.GeoDataFrame:
    """Convert H3 indices in the `cell` column to polygon geometries."""
    # h3pandas adds the .h3 accessor which can do the heavy lifting
    geo_df = df.set_index("cell").h3.h3_to_geo_boundary()
    geo_df.reset_index(inplace=True)  # bring `cell` back as a column
    return geo_df


def extract_shop_points(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """Return centroids of hexagons where an Aurora shop already exists."""
    shops = gdf[gdf["has_aurora"] == 1].copy()
    if shops.empty:
        print("⚠️  No existing Aurora shops found in this dataset!")
        return gpd.GeoDataFrame(columns=["cell", "geometry", "lat", "lon"])

    shops["geometry"] = shops["geometry"].centroid
    shops["lat"] = shops.geometry.y
    shops["lon"] = shops.geometry.x
    return shops[["cell", "lat", "lon", "aurora_probability"]]


# %% ---------------------------------------------------------------------------
# 5. Visualisation
# ------------------------------------------------------------------------------

def build_lviv_map(gdf: gpd.GeoDataFrame, shop_points: gpd.GeoDataFrame) -> folium.Map:
    """Create an interactive folium map with two layers:
    1. Choropleth of predicted probabilities
    2. Circle markers for existing Aurora shops
    """

    print("Building interactive map …")

    lviv_map = folium.Map(
        location=[49.8397, 24.0297],  # Lviv city centre
        zoom_start=12,
        tiles="cartodbpositron",
    )

    # -------------- Probability choropleth layer --------------
    choropleth = folium.Choropleth(
        geo_data=gdf,
        name="Predicted probability",
        data=gdf,
        columns=["cell", "aurora_probability"],
        key_on="feature.properties.cell",
        fill_color="YlOrRd",
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name="Aurora shop probability",
        highlight=True,
    ).add_to(lviv_map)

    # Add tooltips to show probability per hexagon
    folium.GeoJsonTooltip(
        fields=["cell", "aurora_probability"],
        aliases=["Cell:", "Probability:"],
        localize=True,
    ).add_to(choropleth.geojson)

    # -------------- Existing shop markers layer --------------
    shops_layer = folium.FeatureGroup(name="Existing Aurora shops")
    for _, row in shop_points.iterrows():
        folium.CircleMarker(
            location=[row["lat"], row["lon"]],
            radius=6,
            color="blue",
            fill=True,
            fill_color="blue",
            fill_opacity=0.9,
            tooltip=(
                folium.Tooltip(
                    f"Existing Aurora shop\nCell: {row['cell']}\n"
                    f"Predicted prob.: {row['aurora_probability']:.2%}"
                )
            ),
        ).add_to(shops_layer)
    shops_layer.add_to(lviv_map)

    # -------------- Controls --------------
    folium.LayerControl(collapsed=False).add_to(lviv_map)

    return lviv_map


# %% ---------------------------------------------------------------------------
# 6. Main routine
# ------------------------------------------------------------------------------

def main():
    # Load resources
    model = load_model(MODEL_PATH)
    df_lviv = load_lviv_data(CONN_STRING)

    # Predict & add geometries
    df_pred = predict_probabilities(model, df_lviv)
    gdf = add_hexagon_geometry(df_pred)

    # Extract existing shop points
    shop_points = extract_shop_points(gdf)

    # Build and save map
    lviv_map = build_lviv_map(gdf, shop_points)
    output_html = "lviv_aurora_prediction_map.html"
    lviv_map.save(output_html)

    # Show top‑10 candidates in the console
    print("\nTop 10 recommended locations (predicted, no existing shop):")
    print(
        df_pred[df_pred["has_aurora"] == 0]
        .nlargest(10, "aurora_probability")[["cell", "aurora_probability"]]
        .reset_index(drop=True)
    )

    print(f"\n✅  Map saved to '{output_html}'. Open the file in a browser to explore!")

main()


Loading model from 'aurora_location_model.pkl' …
Model loaded OK.
Connecting to the database …
Loaded 207 hexagons for Lviv.
Predicting probabilities …
Prediction finished.
Building interactive map …

Top 10 recommended locations (predicted, no existing shop):
              cell  aurora_probability
0  881e7689abfffff            0.995856
1  881e768839fffff            0.994050
2  881e768915fffff            0.988771
3  881e7689d1fffff            0.982486
4  881e768957fffff            0.954386
5  881e7689c9fffff            0.947204
6  881e768811fffff            0.947133
7  881e7689edfffff            0.937634
8  881e7689c1fffff            0.931176
9  881e768911fffff            0.928864

✅  Map saved to 'lviv_aurora_prediction_map.html'. Open the file in a browser to explore!
