diff --git a/src/globi/tools/visualization/data_sources.py b/src/globi/tools/visualization/data_sources.py index 11e0171..9858528 100644 --- a/src/globi/tools/visualization/data_sources.py +++ b/src/globi/tools/visualization/data_sources.py @@ -252,8 +252,8 @@ def list_available_runs(self) -> list[str]: self._run_dirs = {str(d.relative_to(self.config.base_dir)): d for d in run_dirs} return list(self._run_dirs.keys()) - def load_run_data(self, run_id: str) -> pd.DataFrame: - """Load parquet data for a run.""" + def get_run_parquet_path(self, run_id: str) -> Path: + """Resolved EnergyAndPeak / Results parquet path for a run (for cache keys).""" if run_id not in self._run_dirs: self.list_available_runs() @@ -266,7 +266,11 @@ def load_run_data(self, run_id: str) -> pd.DataFrame: if pq_file is None: msg = f"No .pq file in {run_dir}" raise FileNotFoundError(msg) + return pq_file + def load_run_data(self, run_id: str) -> pd.DataFrame: + """Load parquet data for a run.""" + pq_file = self.get_run_parquet_path(run_id) return load_output_table(pq_file) def load_building_locations(self) -> pd.DataFrame | None: diff --git a/src/globi/tools/visualization/plotting.py b/src/globi/tools/visualization/plotting.py index caa9e0f..7b1a445 100644 --- a/src/globi/tools/visualization/plotting.py +++ b/src/globi/tools/visualization/plotting.py @@ -1690,7 +1690,7 @@ def create_comparison_kde_d3_html( svg.append("text").attr("class", "axis-label").attr("text-anchor", "middle") .attr("x", margin.left + chartWidth / 2).attr("y", height - 6) - .text("energy use intensity (kWh/m2)"); + .text(xAxisTitle); svg.append("text").attr("class", "axis-label").attr("text-anchor", "middle") .attr("transform", "rotate(-90)") .attr("x", -(margin.top + chartHeight / 2)).attr("y", 16) @@ -2565,6 +2565,7 @@ def create_building_map_deck_from_cache( return None map_vis = _maybe_scale_eui_column_for_display(map_df, value_col, eui_unit) features = [] + for i, feat in enumerate(geometry): f = {"polygon": feat["polygon"], "height": feat["height"]} if value_col and value_col in map_vis.columns: diff --git a/src/globi/tools/visualization/utils.py b/src/globi/tools/visualization/utils.py index c6fb837..b79a592 100644 --- a/src/globi/tools/visualization/utils.py +++ b/src/globi/tools/visualization/utils.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any, cast +import numpy as np import pandas as pd # TODO: update this after the building col PR merged @@ -858,19 +859,11 @@ def transform_rotated_rectangle_to_latlon( return result -def build_map_df_from_output( # noqa: C901 +def _build_map_df_legacy_table_only( # noqa: C901 df: pd.DataFrame, cart_crs: str = "EPSG:3857", ) -> pd.DataFrame | None: - """Build map-ready dataframe directly from output parquet. - - Extracts lat/lon from rotated_rectangle. Output Energy is kWh/m² and Peak - is kW/m², so eui and peak_per_sqm are used directly; total_energy and - total_peak are eui*area and peak_per_sqm*area. Returns df with building_id, - lat, lon, rotated_rectangle, height, eui, peak_per_sqm, total_energy, - total_peak, end-use eui cols. Uses vectorized geopandas for geometry when - 100+ rows. - """ + """map_df rows only — parse/transform footprints once per row; used for small n.""" import logging df_reset = df.reset_index() @@ -895,16 +888,28 @@ def build_map_df_from_output( # noqa: C901 areas_arr = _conditioned_area_per_row(df, df_reset) if areas_arr is None: return None + + meter_to_cols: dict[str, list[Any]] = {} + for c in energy_cols: + if isinstance(c, tuple) and len(c) > 2: + meter_to_cols.setdefault(str(c[2]), []).append(c) + + meter_sum_arrays = { + f"eui_{m.lower().replace(' ', '_')}": df[cols] + .sum(axis=1) + .to_numpy(dtype=np.float64) + for m, cols in meter_to_cols.items() + } + # output Energy is kWh/m², Peak is kW/m² - use directly as eui and peak_per_sqm - eui_arr = df[energy_cols].sum(axis=1).values - peak_per_sqm_arr = df[peak_cols].max(axis=1).values + eui_arr = df[energy_cols].sum(axis=1).to_numpy(dtype=np.float64) + peak_per_sqm_arr = df[peak_cols].max(axis=1).to_numpy(dtype=np.float64) h_col = _find_col(df_reset, "height") nf_col = _find_col(df_reset, "num_floors") f2f_col = _find_col(df_reset, "f2f_height") log = logging.getLogger(__name__) - # vectorized path for 100+ rows: batch parse WKT and transform centroids use_vectorized = len(df_reset) >= 100 lon_lat_by_idx: dict[int, tuple[float, float]] = {} wkt_by_idx: dict[int, str] = {} @@ -947,11 +952,12 @@ def build_map_df_from_output( # noqa: C901 log.debug("skip row %s: %s", idx, exc) rows: list[dict] = [] + areas_np = np.asarray(areas_arr, dtype=np.float64) for idx, (lat, lon) in lon_lat_by_idx.items(): wkt = wkt_by_idx.get(idx, "") try: - fval = float(areas_arr[idx]) - area = None if fval != fval or fval <= 0 else fval + fval = float(areas_np[idx]) + area = None if not np.isfinite(fval) or fval <= 0 else fval except (TypeError, ValueError, IndexError): area = None if area is None: @@ -1001,14 +1007,8 @@ def build_map_df_from_output( # noqa: C901 "total_energy": total_energy, "total_peak": total_peak, } - row_vals = df.iloc[idx] - for meter in { - str(c[2]) for c in energy_cols if isinstance(c, tuple) and len(c) > 2 - }: - cols_m = [c for c in energy_cols if c[2] == meter] - if cols_m: - meter_eui = float(row_vals[cols_m].sum()) # already kWh/m² - row_dict[f"eui_{meter.lower().replace(' ', '_')}"] = meter_eui + for k, arr in meter_sum_arrays.items(): + row_dict[k] = float(arr[idx]) rows.append(row_dict) if not rows: @@ -1019,6 +1019,194 @@ def build_map_df_from_output( # noqa: C901 return out +def build_map_df_and_geometry_from_output( # noqa: C901 + df: pd.DataFrame, + cart_crs: str = "EPSG:3857", + *, + default_height_m: float = 10.0, +) -> tuple[pd.DataFrame, list[dict]] | None: + """One footprint parse + to_crs pass; returns map_df (kWh/m² eui) and pydeck geometry.""" + df_reset = df.reset_index() + n = len(df_reset) + bid_col = _find_col(df_reset, BUILDING_ID_COL) + rect_col = _find_rotated_rectangle_col(df_reset) + if bid_col is None or rect_col is None: + return None + + energy_cols = [ + c + for c in df.columns + if isinstance(c, tuple) and c[0] == "Energy" and c[1] == "End Uses" + ] + peak_cols = [ + c + for c in df.columns + if isinstance(c, tuple) and c[0] == "Peak" and c[1] == "Raw" + ] + if not energy_cols or not peak_cols: + return None + + areas_arr = _conditioned_area_per_row(df, df_reset) + if areas_arr is None: + return None + + meter_to_cols: dict[str, list[Any]] = {} + for c in energy_cols: + if isinstance(c, tuple) and len(c) > 2: + meter_to_cols.setdefault(str(c[2]), []).append(c) + + meter_sum_arrays = { + f"eui_{m.lower().replace(' ', '_')}": df[cols] + .sum(axis=1) + .to_numpy(dtype=np.float64) + for m, cols in meter_to_cols.items() + } + + eui_arr_np = df[energy_cols].sum(axis=1).to_numpy(dtype=np.float64) + peak_arr_np = df[peak_cols].max(axis=1).to_numpy(dtype=np.float64) + areas = np.asarray(areas_arr, dtype=np.float64) + + has_height = "height" in df_reset.columns + has_num_floors = _find_col(df_reset, "num_floors") is not None + if not has_height and not has_num_floors: + return None + + if n < 100: + mdf = _build_map_df_legacy_table_only(df, cart_crs=cart_crs) + if mdf is None: + return None + geom = build_map_features_from_df( + mdf, + cart_crs=cart_crs, + value_col=None, + default_height_m=default_height_m, + ) + return (mdf, geom) if geom else None + + wkt_series = cast( + pd.Series, + df_reset[rect_col].apply( + lambda v: getattr(v, "wkt", v) if v is not None else None + ), + ) + parsed = _wkt_to_geoseries_wgs(wkt_series, cart_crs=cart_crs) + if parsed is None: + mdf = _build_map_df_legacy_table_only(df, cart_crs=cart_crs) + if mdf is None: + return None + geom = build_map_features_from_df( + mdf, + cart_crs=cart_crs, + value_col=None, + default_height_m=default_height_m, + ) + return (mdf, geom) if geom else None + + _, gs_wgs, shapely_cart = parsed + polygon_ok = ~gs_wgs.is_empty & gs_wgs.geom_type.isin(["Polygon", "MultiPolygon"]) + gs_poly = gs_wgs.loc[polygon_ok] + if gs_poly.empty: + mdf = _build_map_df_legacy_table_only(df, cart_crs=cart_crs) + if mdf is None: + return None + geom = build_map_features_from_df( + mdf, + cart_crs=cart_crs, + value_col=None, + default_height_m=default_height_m, + ) + return (mdf, geom) if geom else None + + idx_to_pos = pd.Series(np.arange(n, dtype=np.int64), index=df_reset.index) + pos = idx_to_pos.loc[gs_poly.index].to_numpy(dtype=np.int64) + area_row = areas[pos] + keep = np.isfinite(area_row) & (area_row > 0) + gs_u = gs_poly[keep] + pos_u = pos[keep] + + if len(gs_u) == 0: + return None + + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + centroids = gs_u.centroid + lat_a = centroids.y.to_numpy(dtype=np.float64) + lon_a = centroids.x.to_numpy(dtype=np.float64) + + sub_reset = df_reset.take(pos_u) + heights = _compute_heights_vectorized( + sub_reset, df_reset, has_height, default_height_m + ) + # align height series to gs_u index + heights = heights.reindex(gs_u.index) + + row_records: list[dict] = [] + features: list[dict] = [] + import contextlib + + for i in range(len(gs_u)): + idx_label = gs_u.index[i] + geom_ll = gs_u.iloc[i] + poly = _geom_to_polygon_coords(geom_ll) + if poly is None: + continue + pi = int(pos_u[i]) + h = float(heights.loc[idx_label]) + features.append({"polygon": poly, "height": h}) + + area = float(areas[pi]) + eui = float(eui_arr_np[pi]) + peak_psqm = float(peak_arr_np[pi]) + bid = str(df_reset.iloc[pi][bid_col]) + cart_g = shapely_cart.loc[idx_label] + row_dict: dict = { + BUILDING_ID_COL: bid, + LAT_COL: float(lat_a[i]), + LON_COL: float(lon_a[i]), + ROTATED_RECTANGLE_COL: cart_g.wkt, + "height": h, + "conditioned_area": area, + "eui": eui, + "peak_per_sqm": peak_psqm, + "total_energy": eui * area, + "total_peak": peak_psqm * area, + } + for mk, marr in meter_sum_arrays.items(): + v = float(marr[pi]) + with contextlib.suppress(TypeError, ValueError): + row_dict[mk] = v + row_records.append(row_dict) + + if not row_records or not features: + return None + out = pd.DataFrame(row_records) + out[LAT_COL] = out[LAT_COL].astype("float64") + out[LON_COL] = out[LON_COL].astype("float64") + if len(features) != len(out): + mdf = _build_map_df_legacy_table_only(df, cart_crs=cart_crs) + if mdf is None: + return None + geom = build_map_features_from_df( + mdf, + cart_crs=cart_crs, + value_col=None, + default_height_m=default_height_m, + ) + return (mdf, geom) if geom else None + return out, features + + +def build_map_df_from_output( + df: pd.DataFrame, + cart_crs: str = "EPSG:3857", +) -> pd.DataFrame | None: + """Build map-ready dataframe directly from output parquet (kWh/m² eui).""" + pair = build_map_df_and_geometry_from_output(df, cart_crs=cart_crs) + return pair[0] if pair else None + + def _extract_basic_overheating( oh_flat: pd.DataFrame, bid_col, @@ -2012,14 +2200,88 @@ def build_priority_table_df( return df.head(int(top_n)).reset_index(drop=True) +def _merge_one_oh_metric_into_eui_df( + merged: pd.DataFrame, + run_dir: Path, + df_key: str, + heat_threshold_c: float, + aggregation: str, + value_col_name: str, +) -> tuple[pd.DataFrame, bool]: + sub = _load_one_overheating_metric( + run_dir, + df_key, + BUILDING_ID_COL, + float(heat_threshold_c), + aggregation, + ) + if sub is None or sub.empty: + return merged, False + val_col = _df_last_col_name(sub) + part = cast( + pd.DataFrame, + sub.loc[:, [BUILDING_ID_COL, val_col]].copy(), + ).rename(columns={val_col: value_col_name}) + part[BUILDING_ID_COL] = part[BUILDING_ID_COL].astype(str) + return merged.merge(part, on=BUILDING_ID_COL, how="left"), True + + +def _trim_eui_oh_merge_to_rows_with_metric(merged: pd.DataFrame) -> pd.DataFrame | None: + has_edh_col = "edh_zone_weighted" in merged.columns + has_exc_col = "exceedance_hours" in merged.columns + if has_edh_col and has_exc_col: + s_edh = cast(pd.Series, merged["edh_zone_weighted"]) + s_exc = cast(pd.Series, merged["exceedance_hours"]) + keep = s_edh.notna() | s_exc.notna() + elif has_edh_col: + keep = cast(pd.Series, merged["edh_zone_weighted"]).notna() + else: + keep = cast(pd.Series, merged["exceedance_hours"]).notna() + out = merged.loc[keep].copy() + out = out.loc[cast(pd.Series, out["eui"]).notna()].copy() + return out if not out.empty else None + + +def _merge_num_floors_from_energy_df( + merged: pd.DataFrame, + energy_df: pd.DataFrame, +) -> pd.DataFrame: + import logging as _logging + + try: + df_reset = energy_df.reset_index() + nf_col = _find_col(df_reset, "num_floors") + if nf_col is None or nf_col not in df_reset.columns: + return merged + bid_col = _find_col(df_reset, BUILDING_ID_COL) + if bid_col is None: + return merged + nf_df = ( + df_reset[[bid_col, nf_col]] + .drop_duplicates(subset=[bid_col]) # type: ignore[call-arg] + .copy() + ) + nf_df = nf_df.rename(columns={bid_col: BUILDING_ID_COL, nf_col: "num_floors"}) + nf_df[BUILDING_ID_COL] = nf_df[BUILDING_ID_COL].astype(str) + if BUILDING_ID_COL in nf_df.columns and "num_floors" in nf_df.columns: + return merged.merge(nf_df, on=BUILDING_ID_COL, how="left") + except Exception as exc: + _logging.getLogger(__name__).debug("num_floors merge skipped: %s", exc) + return merged + + def build_eui_vs_edh_df( run_dir: Path, heat_threshold_c: float, aggregation: str, ) -> pd.DataFrame | None: - """Join EUI from EnergyAndPeak with EDH; includes num_floors for box plot. + """Join EUI from EnergyAndPeak with EDH and/or basic exceedance hours. + + Uses ExceedanceDegreeHours when present, else BasicOverheating hours (same + aggregation as the dashboard). Includes num_floors when available. - Returns DataFrame with columns: building_id, eui, edh_zone_weighted, num_floors. + Columns: building_id, eui, and any of edh_zone_weighted, exceedance_hours + that loaded successfully. Rows require at least one overheating value. """ energy_path = get_pq_file_for_run(run_dir) if energy_path is None: @@ -2029,47 +2291,44 @@ def build_eui_vs_edh_df( if geo_df is None or geo_df.empty: return None - edh = _load_one_overheating_metric( - run_dir, - "ExceedanceDegreeHours", - BUILDING_ID_COL, - float(heat_threshold_c), - aggregation, + available = list_overheating_files_for_run(run_dir) + merged = cast( + pd.DataFrame, + geo_df.loc[:, [BUILDING_ID_COL, "eui"]].copy(), ) - if edh is None or edh.empty: - return None - val_col = _df_last_col_name(edh) - edh = edh.rename(columns={val_col: "edh_zone_weighted"}) - edh[BUILDING_ID_COL] = edh[BUILDING_ID_COL].astype(str) - geo_df[BUILDING_ID_COL] = geo_df[BUILDING_ID_COL].astype(str) + merged[BUILDING_ID_COL] = merged[BUILDING_ID_COL].astype(str) - merged = geo_df[[BUILDING_ID_COL, "eui"]].merge( - edh, on=BUILDING_ID_COL, how="inner" - ) + edh_loaded = False + basic_loaded = False - # try to extract num_floors from energy_df index - import logging as _logging + if "ExceedanceDegreeHours" in available: + merged, edh_loaded = _merge_one_oh_metric_into_eui_df( + merged, + run_dir, + "ExceedanceDegreeHours", + heat_threshold_c, + aggregation, + "edh_zone_weighted", + ) - try: - df_reset = energy_df.reset_index() - nf_col = _find_col(df_reset, "num_floors") - if nf_col is not None and nf_col in df_reset.columns: - bid_col = _find_col(df_reset, BUILDING_ID_COL) - if bid_col is not None: - nf_df = ( - df_reset[[bid_col, nf_col]] - .drop_duplicates(subset=[bid_col]) # type: ignore[call-arg] - .copy() - ) - nf_df = nf_df.rename( - columns={bid_col: BUILDING_ID_COL, nf_col: "num_floors"} - ) - nf_df[BUILDING_ID_COL] = nf_df[BUILDING_ID_COL].astype(str) - if BUILDING_ID_COL in nf_df.columns and "num_floors" in nf_df.columns: - merged = merged.merge(nf_df, on=BUILDING_ID_COL, how="left") - except Exception as exc: - _logging.getLogger(__name__).debug("num_floors merge skipped: %s", exc) + if "BasicOverheating" in available: + merged, basic_loaded = _merge_one_oh_metric_into_eui_df( + merged, + run_dir, + "BasicOverheating", + heat_threshold_c, + aggregation, + "exceedance_hours", + ) + + if not edh_loaded and not basic_loaded: + return None + + merged = _trim_eui_oh_merge_to_rows_with_metric(merged) + if merged is None: + return None + merged = _merge_num_floors_from_energy_df(merged, energy_df) return merged if not merged.empty else None diff --git a/src/globi/tools/visualization/views/raw_data.py b/src/globi/tools/visualization/views/raw_data.py index fd3c936..434dc79 100644 --- a/src/globi/tools/visualization/views/raw_data.py +++ b/src/globi/tools/visualization/views/raw_data.py @@ -2,11 +2,13 @@ from __future__ import annotations +from pathlib import Path + import pandas as pd import streamlit as st import streamlit.components.v1 as components -from globi.tools.visualization.data_sources import DataSource +from globi.tools.visualization.data_sources import DataSource, LocalDataSource from globi.tools.visualization.export import render_html_to_png from globi.tools.visualization.plotting import ( EnergyIntensityUnit, @@ -28,29 +30,65 @@ LAT_COL, LON_COL, MAP_POLYGON_CRS_OPTIONS, - build_map_df_from_output, - build_map_features_from_df, + build_map_df_and_geometry_from_output, + format_rotated_rectangle_crs_hint, has_geo_columns, has_rotated_rectangle_for_visualization, infer_rotated_rectangle_crs_hint, list_categorical_columns, list_numeric_columns, + load_output_table, + read_parquet_sample_for_crs_inference, suggested_polygon_crs_select_index, ) +@st.cache_data(show_spinner="Loading run…") +def _cached_load_run_parquet(path_str: str, mtime: float) -> pd.DataFrame: + return load_output_table(Path(path_str)) + + +@st.cache_data(show_spinner=False) +def _cached_extract_d3_for_run(path_str: str, mtime: float, run_label: str) -> dict: + df = _cached_load_run_parquet(path_str, mtime) + return extract_d3_data(df, region_name=run_label, scenario_name="") + + +@st.cache_data(show_spinner=False) +def _cached_crs_hint_from_pq(path_str: str, mtime: float) -> dict: + sample = read_parquet_sample_for_crs_inference(Path(path_str), max_rows=80) + if sample is None or sample.empty: + return { + "has_footprints": False, + "suggested_crs": None, + "scores": {}, + "n_geoms": 0, + "native_bounds": None, + "ambiguous": False, + "tied_crs": (), + } + return infer_rotated_rectangle_crs_hint(sample) + + @st.cache_data(show_spinner="Building map data (geometry + metrics)...") -def _build_map_cache(run_label: str, cart_crs: str, _df: pd.DataFrame): - """Build map_df and geometry. _df excluded from cache key (use run_label).""" - map_df = build_map_df_from_output(_df, cart_crs=cart_crs) - src_for_geom = map_df if map_df is not None else _df - geometry = build_map_features_from_df( - src_for_geom, cart_crs=cart_crs, value_col=None - ) - if geometry is None: +def _build_map_cache_local( + path_str: str, mtime: float, cart_crs: str, _df: pd.DataFrame +): + """Map cache keyed by parquet path, mtime, and CRS; dataframe body not hashed.""" + pair = build_map_df_and_geometry_from_output(_df, cart_crs=cart_crs) + if pair is None: + return None + map_df, geometry = pair + return map_df, geometry + + +@st.cache_data(show_spinner="Building map data (geometry + metrics)...") +def _build_map_cache_remote(run_label: str, cart_crs: str, _df: pd.DataFrame): + """Fallback when parquet path identity is unavailable (e.g. S3).""" + pair = build_map_df_and_geometry_from_output(_df, cart_crs=cart_crs) + if pair is None: return None - deck_df = map_df if map_df is not None else _df - return (deck_df, geometry) + return pair[0], pair[1] _COLORMAP_GRADIENTS = { @@ -218,9 +256,16 @@ def render_raw_data_page(data_source: DataSource) -> None: index=max(len(available_runs) - 1, 0), ) + pq_token: tuple[str, float] | None = None try: - with st.spinner(f"Loading {selected_run}..."): - df = data_source.load_run_data(selected_run) + if isinstance(data_source, LocalDataSource): + pq = data_source.get_run_parquet_path(selected_run) + pq_token = (str(pq.resolve()), pq.stat().st_mtime) + with st.spinner(f"Loading {selected_run}..."): + df = _cached_load_run_parquet(pq_token[0], pq_token[1]) + else: + with st.spinner(f"Loading {selected_run}..."): + df = data_source.load_run_data(selected_run) except Exception as e: st.error(f"Failed to load data: {e}") return @@ -229,9 +274,9 @@ def render_raw_data_page(data_source: DataSource) -> None: if is_results_format(df): eui_unit = pick_energy_intensity_unit() - _render_results_format(df, selected_run, data_source, eui_unit) + _render_results_format(df, selected_run, data_source, eui_unit, pq_token) else: - _render_generic_format(df, selected_run) + _render_generic_format(df, selected_run, pq_token) def _render_results_format( @@ -239,24 +284,31 @@ def _render_results_format( run_label: str, data_source: DataSource, eui_unit: EnergyIntensityUnit, + pq_token: tuple[str, float] | None, ) -> None: """Render Results.pq format with summary and map tabs.""" summary_tab, map_tab = st.tabs(["Summary", "Map"]) with summary_tab: - _render_results_summary(df, run_label, eui_unit) + _render_results_summary(df, run_label, eui_unit, pq_token) with map_tab: - _render_results_map(df, run_label, data_source, eui_unit) + _render_results_map(df, run_label, data_source, eui_unit, pq_token) def _render_results_summary( - df: pd.DataFrame, run_label: str, eui_unit: EnergyIntensityUnit + df: pd.DataFrame, + run_label: str, + eui_unit: EnergyIntensityUnit, + pq_token: tuple[str, float] | None, ) -> None: """Render D3 summary visualizations for Results format.""" st.markdown("### Results Summary") theme = _streamlit_theme() - d3_data = extract_d3_data(df, region_name=run_label, scenario_name="") + if pq_token is not None: + d3_data = _cached_extract_d3_for_run(pq_token[0], pq_token[1], run_label) + else: + d3_data = extract_d3_data(df, region_name=run_label, scenario_name="") eui_lbl = energy_intensity_axis_label(eui_unit) st.subheader("EUI Distribution") @@ -363,6 +415,7 @@ def _render_results_map( run_label: str, data_source: DataSource, eui_unit: EnergyIntensityUnit, + pq_token: tuple[str, float] | None, ) -> None: """Render 3D building map from rotated_rectangle and height. @@ -384,13 +437,17 @@ def _render_results_map( st.markdown("### 3D Building Map") - _rr_sample = df.iloc[: min(400, len(df))] - _rr_hint = infer_rotated_rectangle_crs_hint(_rr_sample) + if pq_token is not None: + crs_hint = _cached_crs_hint_from_pq(pq_token[0], pq_token[1]) + else: + _rr_sample = df.iloc[: min(400, len(df))] + crs_hint = infer_rotated_rectangle_crs_hint(_rr_sample) + st.caption(format_rotated_rectangle_crs_hint(crs_hint)) cart_crs = st.selectbox( "Polygon CRS (rotated_rectangle coordinates)", options=list(MAP_POLYGON_CRS_OPTIONS), - index=suggested_polygon_crs_select_index(_rr_hint), - help="EPSG:3857 (Web Mercator) is typical for geometry.py pipelines.", + index=suggested_polygon_crs_select_index(crs_hint), + help="Inferred from a sample of this parquet. Adjust if buildings are offset.", key=f"results_map_crs__{run_norm.replace('/', '_')[:120]}", ) @@ -409,7 +466,10 @@ def _render_results_map( ) value_col, cmap, metric_label = metric_option - cached = _build_map_cache(run_label, cart_crs, df) + if pq_token is not None: + cached = _build_map_cache_local(pq_token[0], pq_token[1], cart_crs, df) + else: + cached = _build_map_cache_remote(run_label, cart_crs, df) if cached is not None: map_df, geometry = cached result = create_building_map_deck_from_cache( @@ -452,16 +512,23 @@ def _render_results_map( def _render_generic_rotated_rectangle_map( - df: pd.DataFrame, run_label: str, safe_key: str + df: pd.DataFrame, + run_label: str, + safe_key: str, + pq_token: tuple[str, float] | None, ) -> None: """Footprint + extrusion map for flat or mixed parquet (no Summary tab MultiIndex required).""" - _rr_sample = df.iloc[: min(400, len(df))] - _rr_hint = infer_rotated_rectangle_crs_hint(_rr_sample) + if pq_token is not None: + crs_hint = _cached_crs_hint_from_pq(pq_token[0], pq_token[1]) + else: + _rr_sample = df.iloc[: min(400, len(df))] + crs_hint = infer_rotated_rectangle_crs_hint(_rr_sample) + st.caption(format_rotated_rectangle_crs_hint(crs_hint)) cart_crs = st.selectbox( "Polygon CRS (rotated_rectangle coordinates)", options=list(MAP_POLYGON_CRS_OPTIONS), - index=suggested_polygon_crs_select_index(_rr_hint), - help="EPSG:32619 is common for Everett-area UTM zone 19N outputs.", + index=suggested_polygon_crs_select_index(crs_hint), + help="Inferred from a sample of this parquet. Adjust if buildings are offset.", key=f"generic_rr_crs_{safe_key}", ) numeric_cols = list_numeric_columns(df) @@ -480,7 +547,10 @@ def _render_generic_rotated_rectangle_map( map_color_col = choice metric_label = str(choice) - cached = _build_map_cache(f"generic:{run_label}", cart_crs, df) + if pq_token is not None: + cached = _build_map_cache_local(pq_token[0], pq_token[1], cart_crs, df) + else: + cached = _build_map_cache_remote(f"generic:{run_label}", cart_crs, df) if cached is not None: map_df, geometry = cached result = create_building_map_deck_from_cache( @@ -517,7 +587,9 @@ def _render_generic_rotated_rectangle_map( ) -def _render_generic_format(df: pd.DataFrame, run_label: str) -> None: +def _render_generic_format( + df: pd.DataFrame, run_label: str, pq_token: tuple[str, float] | None +) -> None: """Render generic parquet format with map and D3 summaries.""" theme = _streamlit_theme() numeric_cols = list_numeric_columns( @@ -528,7 +600,7 @@ def _render_generic_format(df: pd.DataFrame, run_label: str) -> None: safe_key = run_label.replace("/", "_").replace("\\", "_") if has_rotated_rectangle_for_visualization(df): - _render_generic_rotated_rectangle_map(df, run_label, safe_key) + _render_generic_rotated_rectangle_map(df, run_label, safe_key, pq_token) elif has_geo_columns(df): if not numeric_cols: st.info("No numeric columns available for height metric.") diff --git a/src/globi/tools/visualization/views/use_cases.py b/src/globi/tools/visualization/views/use_cases.py index e91ad49..839f34b 100644 --- a/src/globi/tools/visualization/views/use_cases.py +++ b/src/globi/tools/visualization/views/use_cases.py @@ -2,8 +2,11 @@ from __future__ import annotations +import contextlib +import json import math -from typing import cast +from pathlib import Path +from typing import Any, cast import numpy as np import pandas as pd @@ -50,15 +53,17 @@ build_consecutive_exceedances_building_df, build_eui_vs_edh_df, build_heat_index_per_building_df, + build_overheating_map_df, build_overheating_threshold_fan_wide_df, build_portfolio_multi_metric_df, build_priority_table_df, + build_run_buildings_df, build_threshold_sensitivity_df, build_worst_zone_ratio_df, + get_overheating_file_for_run, get_pq_file_for_run, infer_rotated_rectangle_crs_hint, read_parquet_sample_for_crs_inference, - resolve_buildings_df_for_overheating_plots, sample_overheating_fan_payload, suggested_polygon_crs_select_index, ) @@ -318,6 +323,56 @@ def _uniquify_display_names( return out +def _normalize_signature_tree(obj: Any) -> Any: + """Round numerics and sort dict keys so widget reruns don't flip JSON fingerprints.""" + if isinstance(obj, float | np.floating): + v = float(obj) + if math.isnan(v) or math.isinf(v): + return None + return round(v, 6) + if isinstance(obj, int | np.integer) and not isinstance(obj, bool): + return round(float(obj), 6) + if isinstance(obj, dict): + return { + str(k).strip(): _normalize_signature_tree(v) + for k, v in sorted(obj.items(), key=lambda kv: str(kv[0])) + } + if isinstance(obj, list | tuple): + return [_normalize_signature_tree(v) for v in obj] + if isinstance(obj, str): + return obj.strip() + return obj + + +def _retrofit_input_signature( + selected_runs: list[str], + per_scenario_energy_costs: dict[str, dict[str, float]], + per_scenario_emissions: dict[str, dict[str, float]], + system_costs_per_sqm: dict[str, float], + display_names: dict[str, str], +) -> str: + """Stable fingerprint for retrofit form inputs (invalidates cached 'compared' UI).""" + payload = { + "runs": [str(r).strip() for r in selected_runs], + "ec": _normalize_signature_tree(per_scenario_energy_costs), + "em": _normalize_signature_tree(per_scenario_emissions), + "syscost": _normalize_signature_tree(system_costs_per_sqm), + "names": _normalize_signature_tree(display_names), + } + return json.dumps(payload, sort_keys=True, default=str) + + +def _scenario_comparison_input_signature( + selected_runs: list[str], + display_names: dict[str, str], +) -> str: + payload = { + "runs": [str(r).strip() for r in selected_runs], + "names": _normalize_signature_tree(display_names), + } + return json.dumps(payload, sort_keys=True, default=str) + + def _retrofit_params_form( selected_runs: list[str], ) -> tuple[ @@ -435,7 +490,23 @@ def _render_retrofit_use_case(data_source: DataSource) -> None: st.info("Select at least 2 scenarios to generate a comparison.") return - if not st.button("Compare Scenarios", key="retrofit_compare"): + compare_clicked = st.button("Compare Scenarios", key="retrofit_compare") + + input_sig = _retrofit_input_signature( + selected_runs, + per_scenario_energy_costs, + per_scenario_emissions, + system_costs_per_sqm, + display_names, + ) + + if compare_clicked: + st.session_state["retrofit_comparison_ready"] = True + st.session_state["retrofit_comparison_sig"] = input_sig + elif st.session_state.get("retrofit_comparison_sig") != input_sig: + st.session_state["retrofit_comparison_ready"] = False + + if not st.session_state.get("retrofit_comparison_ready"): return dfs: dict[str, pd.DataFrame] = {} @@ -642,6 +713,39 @@ def _render_retrofit_charts( ) +def _overheating_map_inputs_mtime(run_dir: Path, data_source_type: str) -> float: + """Max mtime of overheating + Energy parquet inputs for cache invalidation.""" + t = 0.0 + oh = get_overheating_file_for_run(run_dir, data_source_type) + en = get_pq_file_for_run(run_dir) + for p in (oh, en): + if p is not None and p.is_file(): + with contextlib.suppress(OSError): + t = max(t, p.stat().st_mtime) + return t + + +@st.cache_data(show_spinner=False) +def _cached_build_overheating_map_df( + run_dir_str: str, + inputs_mtime: float, + cart_crs: str, + heat_threshold_c: float, + aggregation: str, + data_source_type: str, + heat_index_metric: str, +) -> pd.DataFrame | None: + """Disk + merge + geometry for overheating map; keyed by run contents + params.""" + return build_overheating_map_df( + Path(run_dir_str), + cart_crs=cart_crs, + heat_threshold_c=heat_threshold_c, + aggregation=aggregation, + data_source_type=data_source_type, + heat_index_metric=heat_index_metric, + ) + + @st.cache_data(show_spinner="Building map geometry...") def _build_retrofit_geometry_cache(scenario: str, cart_crs: str, _map_df: pd.DataFrame): """Build geometry from map_df. _map_df excluded from cache key (use scenario).""" @@ -651,11 +755,19 @@ def _build_retrofit_geometry_cache(scenario: str, cart_crs: str, _map_df: pd.Dat @st.cache_data(show_spinner="Building map geometry...") -def _build_overheating_geometry_cache(cart_crs: str, map_df: pd.DataFrame): - """Cache WKT parse + crs transform per map_df and crs (overheating merges).""" +def _build_overheating_geometry_cache( + run_dir_str: str, + inputs_mtime: float, + cart_crs: str, + heat_threshold_c: float, + aggregation: str, + data_source_type: str, + _map_df: pd.DataFrame, +): + """Geometry from map_df; _map_df excluded from key — must match other args.""" from globi.tools.visualization.utils import build_map_features_from_df - return build_map_features_from_df(map_df, cart_crs=cart_crs, value_col=None) + return build_map_features_from_df(_map_df, cart_crs=cart_crs, value_col=None) @st.cache_data(show_spinner=False) @@ -675,6 +787,35 @@ def _cached_infer_rotated_rectangle_crs_hint(pq_path: str, mtime: float) -> dict return infer_rotated_rectangle_crs_hint(sample) +_RETROFIT_MAP_COLOR_FIELD_ORDER = ( + "eui", + "total_energy", + "energy_cost", + "emissions", + "capital_cost", + "total_cost", + "peak_per_sqm", + "total_peak", +) + + +def _retrofit_map_color_field_label( + metric_key: str, eui_unit: EnergyIntensityUnit +) -> str: + if metric_key == "eui": + return energy_intensity_axis_label(eui_unit) + labels = { + "total_energy": "Total energy (kWh)", + "energy_cost": "Energy cost ($)", + "emissions": "Emissions (kg CO2)", + "capital_cost": "System cost ($)", + "total_cost": "Total cost ($)", + "peak_per_sqm": "Peak per sqm (kW/m²)", + "total_peak": "Total peak (kW)", + } + return labels.get(metric_key, metric_key) + + def _render_retrofit_map( dfs: dict[str, pd.DataFrame], per_scenario_energy_costs: dict[str, dict[str, float]], @@ -695,28 +836,18 @@ def _render_retrofit_map( key="retrofit_map_scenario", ) with col2: - eui_lbl = energy_intensity_axis_label(eui_unit) - metric_option = st.selectbox( + color_field = st.selectbox( "Color by", - options=[ - ("eui", "greens", eui_lbl), - ("total_energy", "viridis", "Total energy (kWh)"), - ("energy_cost", "reds", "Energy cost ($)"), - ("emissions", "reds", "Emissions (kg CO2)"), - ("capital_cost", "plasma", "System cost ($)"), - ("total_cost", "reds", "Total cost ($)"), - ("peak_per_sqm", "reds", "Peak per sqm (kW/m²)"), - ("total_peak", "plasma", "Total peak (kW)"), - ], - format_func=lambda x: x[2], - key="retrofit_map_metric", + options=list(_RETROFIT_MAP_COLOR_FIELD_ORDER), + format_func=lambda k: _retrofit_map_color_field_label(k, eui_unit), + key="retrofit_map_color_field", ) - value_col, default_cmap, metric_label = metric_option + metric_label = _retrofit_map_color_field_label(color_field, eui_unit) + value_col = color_field with col3: cmap = st.selectbox( "Colormap", options=["reds", "greens", "viridis", "plasma"], - index=["reds", "greens", "viridis", "plasma"].index(default_cmap), key="retrofit_map_cmap", ) @@ -779,43 +910,44 @@ def _render_retrofit_map( _render_colormap_legend(metric_label, value_stats, cmap) -def _load_overheating_dashboard_data( - data_source: DataSource, - selected_run: str, +@st.cache_data(show_spinner=False) +def _load_overheating_dashboard_data_cached( + run_dir: Path | None, + available_files: tuple[str, ...], + thresholds: tuple[float, ...], heat_threshold: float, aggregation: str, cart_crs: str, + buildings_fallback_df: pd.DataFrame | None, ) -> dict: - """Load all overheating data at once; values may be None if unavailable.""" - run_dir = data_source.resolve_run_dir(selected_run) - available_files = data_source.list_overheating_files(selected_run) - thresholds = data_source.get_overheating_thresholds(selected_run) - + """Cached loader — all args are primitives/Paths so Streamlit can hash them.""" primary_dstype = ( "ExceedanceDegreeHours" if "ExceedanceDegreeHours" in available_files else (available_files[0] if available_files else "BasicOverheating") ) - map_df = data_source.load_overheating_map_data( - selected_run, - cart_crs=cart_crs, - heat_threshold_c=heat_threshold, - aggregation=aggregation, - data_source_type=primary_dstype, - heat_index_metric="danger_hours", - ) - + map_df = None multi_metric_df = None heat_index_df = None threshold_sensitivity_df = None fan_wide = None eui_edh_df = None buildings_df = None - building_area_df = None consecutive_df = None + if run_dir is not None: + _oh_mtime = _overheating_map_inputs_mtime(run_dir, primary_dstype) + map_df = _cached_build_overheating_map_df( + str(run_dir.resolve()), + _oh_mtime, + cart_crs, + heat_threshold, + aggregation, + primary_dstype, + "danger_hours", + ) multi_metric_df = build_portfolio_multi_metric_df( run_dir, heat_threshold, aggregation ) @@ -827,10 +959,9 @@ def _load_overheating_dashboard_data( run_dir, primary_dstype, aggregation ) eui_edh_df = build_eui_vs_edh_df(run_dir, heat_threshold, aggregation) - buildings_df = resolve_buildings_df_for_overheating_plots( - run_dir, - data_source.load_building_locations, - ) + buildings_df = build_run_buildings_df(run_dir) + if buildings_df is None: + buildings_df = buildings_fallback_df building_area_df = build_building_area_df(run_dir) if "ConsecutiveExceedances" in available_files: consecutive_df = build_consecutive_exceedances_building_df( @@ -844,8 +975,8 @@ def _load_overheating_dashboard_data( "threshold_sensitivity_df": threshold_sensitivity_df, "fan_wide": fan_wide, "run_dir": run_dir, - "available_files": available_files, - "thresholds": thresholds, + "available_files": list(available_files), + "thresholds": list(thresholds), "eui_edh_df": eui_edh_df, "buildings_df": buildings_df, "building_area_df": building_area_df, @@ -854,6 +985,29 @@ def _load_overheating_dashboard_data( } +def _load_overheating_dashboard_data( + data_source: DataSource, + selected_run: str, + heat_threshold: float, + aggregation: str, + cart_crs: str, +) -> dict: + """Load all overheating data at once; values may be None if unavailable.""" + run_dir = data_source.resolve_run_dir(selected_run) + available_files = tuple(data_source.list_overheating_files(selected_run)) + thresholds = tuple(data_source.get_overheating_thresholds(selected_run)) + buildings_fallback_df = data_source.load_building_locations() + return _load_overheating_dashboard_data_cached( + run_dir, + available_files, + thresholds, + heat_threshold, + aggregation, + cart_crs, + buildings_fallback_df, + ) + + def _render_tab_portfolio( # noqa: C901 data: dict, heat_threshold: float, @@ -1432,7 +1586,31 @@ def _render_tab_geography( # noqa: C901 if chosen is None: return - with st.spinner("Loading map data..."): + run_dir = data_source.resolve_run_dir(selected_run) + + primary = data.get("primary_dstype") + dash_map = data.get("map_df") + map_df = None + if ( + run_dir is not None + and dash_map is not None + and not getattr(dash_map, "empty", True) + and chosen == primary + and geo_agg == aggregation + ): + map_df = dash_map + elif run_dir is not None: + _om = _overheating_map_inputs_mtime(run_dir, chosen) + map_df = _cached_build_overheating_map_df( + str(run_dir.resolve()), + _om, + cart_crs, + heat_threshold, + geo_agg, + chosen, + "danger_hours", + ) + else: map_df = data_source.load_overheating_map_data( selected_run, cart_crs=cart_crs, @@ -1452,7 +1630,24 @@ def _render_tab_geography( # noqa: C901 # 3D map st.markdown("#### 3D building map") - geometry_cache = _build_overheating_geometry_cache(cart_crs, map_df) + geometry_cache = None + if run_dir is not None: + _img_m = _overheating_map_inputs_mtime(run_dir, chosen) + geometry_cache = _build_overheating_geometry_cache( + str(run_dir.resolve()), + _img_m, + cart_crs, + heat_threshold, + geo_agg, + chosen, + map_df, + ) + else: + from globi.tools.visualization.utils import build_map_features_from_df + + geometry_cache = build_map_features_from_df( + map_df, cart_crs=cart_crs, value_col=None + ) result = None if geometry_cache is not None: result = create_building_map_deck_from_cache( @@ -1556,14 +1751,43 @@ def _render_tab_geography( # noqa: C901 ) if "BasicOverheating" in available_files and chosen != "BasicOverheating": - _render_geo_hours_brush_panel( - data_source, selected_run, heat_threshold, geo_agg, cart_crs, theme - ) + hrs_map_df = None + if ( + run_dir is not None + and dash_map is not None + and not getattr(dash_map, "empty", True) + and primary == "BasicOverheating" + and geo_agg == aggregation + ): + hrs_map_df = dash_map + elif run_dir is not None: + _hrs_m = _overheating_map_inputs_mtime(run_dir, "BasicOverheating") + hrs_map_df = _cached_build_overheating_map_df( + str(run_dir.resolve()), + _hrs_m, + cart_crs, + heat_threshold, + geo_agg, + "BasicOverheating", + "danger_hours", + ) + else: + hrs_map_df = data_source.load_overheating_map_data( + selected_run, + cart_crs=cart_crs, + heat_threshold_c=heat_threshold, + aggregation=geo_agg, + data_source_type="BasicOverheating", + heat_index_metric="danger_hours", + ) + if hrs_map_df is not None and not hrs_map_df.empty: + _render_geo_hours_brush_panel( + hrs_map_df, heat_threshold, geo_agg, cart_crs, theme + ) def _render_geo_hours_brush_panel( - data_source: DataSource, - selected_run: str, + hrs_map_df: pd.DataFrame, heat_threshold: float, geo_agg: str, cart_crs: str, @@ -1571,17 +1795,6 @@ def _render_geo_hours_brush_panel( ) -> None: """Second geography brush panel: total hours above threshold, buildings shown in flat red.""" st.divider() - with st.spinner("Loading total hours above threshold map..."): - hrs_map_df = data_source.load_overheating_map_data( - selected_run, - cart_crs=cart_crs, - heat_threshold_c=heat_threshold, - aggregation=geo_agg, - data_source_type="BasicOverheating", - heat_index_metric="danger_hours", - ) - if hrs_map_df is None or hrs_map_df.empty: - return hrs_vals_raw = hrs_map_df["map_value"].dropna().astype(float) if hrs_vals_raw.empty: @@ -1929,23 +2142,26 @@ def _render_tab_correlations( # noqa: C901 and not eui_edh.empty and "eui" in eui_edh.columns and "edh_zone_weighted" in eui_edh.columns + and (eui_edh["eui"].notna() & eui_edh["edh_zone_weighted"].notna()).sum() >= 2 ) - # Join exceedance hours from multi_metric_df onto eui_edh for the second scatter + # exceedance hours: prefer columns from eui_edh (basic-only runs), else multi eui_hours = None - if ( - multi is not None - and "exceedance_hours" in multi.columns - and eui_edh is not None - and "eui" in eui_edh.columns - ): - eui_hours = eui_edh[[BUILDING_ID_COL, "eui"]].merge( - multi[[BUILDING_ID_COL, "exceedance_hours"]], - on=BUILDING_ID_COL, - how="inner", - ) - eui_hours = eui_hours.dropna(subset=["eui", "exceedance_hours"]) - if eui_hours.empty: - eui_hours = None + if eui_edh is not None and "eui" in eui_edh.columns: + if "exceedance_hours" in eui_edh.columns: + eui_hours = eui_edh[[BUILDING_ID_COL, "eui", "exceedance_hours"]].dropna( + subset=["eui", "exceedance_hours"] + ) + if eui_hours.empty: + eui_hours = None + elif multi is not None and "exceedance_hours" in multi.columns: + eui_hours = eui_edh[[BUILDING_ID_COL, "eui"]].merge( + multi[[BUILDING_ID_COL, "exceedance_hours"]], + on=BUILDING_ID_COL, + how="inner", + ) + eui_hours = eui_hours.dropna(subset=["eui", "exceedance_hours"]) + if eui_hours.empty: + eui_hours = None if has_edh or eui_hours is not None: st.divider() @@ -2033,11 +2249,12 @@ def _render_tab_correlations( # noqa: C901 ], ) - # --- Box plot by number of floors --- + # --- Box plot by number of floors (EDH only) --- if ( eui_edh is not None and "num_floors" in eui_edh.columns and "edh_zone_weighted" in eui_edh.columns + and (eui_edh["edh_zone_weighted"].notna()).sum() >= 3 ): st.divider() st.markdown("#### EDH by number of floors") @@ -2339,7 +2556,7 @@ def _render_overheating_use_case(data_source: DataSource) -> None: _render_tab_correlations(dashboard_data, heat_threshold, aggregation, theme) -def _render_scenario_comparison(data_source: DataSource) -> None: +def _render_scenario_comparison(data_source: DataSource) -> None: # noqa: C901 """Render scenario comparison with EUI, end uses, and utilities charts.""" st.markdown("### Scenario Comparison") st.markdown("Compare energy distributions across multiple scenarios.") @@ -2374,7 +2591,17 @@ def _render_scenario_comparison(data_source: DataSource) -> None: ) display_names[run_id] = (val.strip() or run_id) if val else run_id - if not st.button("Generate Comparison"): + compare_clicked = st.button("Generate Comparison", key="scenario_compare_generate") + + input_sig = _scenario_comparison_input_signature(selected_runs, display_names) + + if compare_clicked: + st.session_state["scenario_comparison_ready"] = True + st.session_state["scenario_comparison_sig"] = input_sig + elif st.session_state.get("scenario_comparison_sig") != input_sig: + st.session_state["scenario_comparison_ready"] = False + + if not st.session_state.get("scenario_comparison_ready"): return # load data for each selected scenario