# Task 3 · Interactive & Extended Vega-Lite Visualizations
This section extends Task 2 by adding **interactivity, composition, and linked views** to deepen exploration of San Francisco property data (2015–2023). Each visualization clearly states its **purpose**, **interactivity**, and **key 
**What’s included**
- Explorers (brush, zoom/pan, tooltips)
- Geospatial + time (choropleth with year slider)
- Linked dashboards (bar → time series → scatter)
- Multidimensional profiling (parallel coordinates)
- Technique summary & reproducibility notes

## Setup and Data Loading

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import altair as alt
from sklearn.preprocessing import MinMaxScaler
import warnings
import geopandas as gpd
warnings.filterwarnings('ignore')

# Configure Altair
alt.data_transformers.disable_max_rows()

print(f"Altair version: {alt.__version__}")
print("✓ Libraries loaded successfully!")

In [None]:
# Load data
print("Loading data...")
df = pd.read_parquet('sf_property_data_clean.parquet')

# Fix datetime columns for JSON compatibility
for col in df.columns:
    if pd.api.types.is_datetime64_any_dtype(df[col]):
        df[col] = df[col].astype(str)

print(f"✓ Dataset loaded: {df.shape[0]:,} records")
print(f"  Years: {df['year'].min()} - {df['year'].max()}")
print(f"  Neighborhoods: {df['neighborhood'].nunique()}")

In [None]:
# Prepare aggregated datasets for performance
print("\nPreparing optimized datasets...\n")

# 1. Yearly neighborhood statistics (for temporal analysis)
neighborhood_yearly = df.groupby(['neighborhood', 'year']).agg({
    'total_assessed_value': ['median', 'mean', 'count']
}).reset_index()
neighborhood_yearly.columns = ['neighborhood', 'year', 'median_value', 'mean_value', 'count']

# 2. Current year (2023) statistics
current_stats = df[df['year'] == 2023].groupby('neighborhood').agg({
    'total_assessed_value': 'median',
    'land_value_pct': 'mean',
    'building_age': 'mean',
    'property_area': 'median'
}).reset_index()
current_stats.columns = ['neighborhood', 'median_value', 'land_pct', 'building_age', 'area']

# 3. Calculate appreciation rates (2015-2023)
value_2015 = df[df['year'] == 2015].groupby('neighborhood')['total_assessed_value'].median()
value_2023 = df[df['year'] == 2023].groupby('neighborhood')['total_assessed_value'].median()
appreciation = ((value_2023 - value_2015) / value_2015 * 100).reset_index()
appreciation.columns = ['neighborhood', 'appreciation_pct']
current_stats = current_stats.merge(appreciation, on='neighborhood', how='left')

# 4. Sample for scatter plots (15k records for performance)
df_sample = df[df['year'] == 2023].sample(n=15000, random_state=42)



## 3.1 Interactive Neighborhood Explorer

This interactive dashboard combines neighborhood-level property value and growth information into one view.  
Users can click neighborhoods to highlight them and compare their median property values, 8-year appreciation, and key characteristics.  
The linked summary table updates automatically based on the selected neighborhoods.

In Task 2, this analysis was spread across multiple static charts.  
Here, the information is consolidated and made interactive, allowing side-by-side comparison of high-value and fast-growing areas within the same visual.


In [None]:
print("Creating Interactive Geographic Dashboard...\n")

# Get top 20 neighborhoods for cleaner visualization
top_neighborhoods = current_stats.nlargest(20, 'median_value')['neighborhood'].tolist()
map_data = current_stats[current_stats['neighborhood'].isin(top_neighborhoods)].copy()

# Create selection
selection = alt.selection_point(fields=['neighborhood'], empty=True, on='click')

# Main geographic view (horizontal bar chart)
geo_chart = alt.Chart(map_data).mark_bar().encode(
    y=alt.Y('neighborhood:N', title='Neighborhood', sort='-x'),
    x=alt.X('median_value:Q', title='Median Property Value ($)', axis=alt.Axis(format='$,.0f')),
    color=alt.condition(
        selection,
        alt.Color('median_value:Q', scale=alt.Scale(scheme='viridis'), legend=None),
        alt.value('lightgray')
    ),
    tooltip=[
        alt.Tooltip('neighborhood:N', title='Neighborhood'),
        alt.Tooltip('median_value:Q', title='Median Value', format='$,.0f'),
        alt.Tooltip('appreciation_pct:Q', title='8-Yr Growth %', format='.1f')
    ]
).add_params(
    selection
).properties(
    width=600,
    height=400,
    title='Click Neighborhoods to Explore (Shift+Click for Multiple)'
)

# Linked detail view
detail_chart = alt.Chart(map_data).mark_bar().encode(
    x=alt.X('metric:N', title=None, axis=alt.Axis(labelAngle=0)),
    y=alt.Y('value:Q', title='Score'),
    color=alt.Color('metric:N', legend=None),
    tooltip=['metric:N', alt.Tooltip('value:Q', format='.2f')]
).transform_filter(
    selection
).transform_fold(
    ['appreciation_pct', 'land_pct', 'building_age'],
    as_=['metric', 'value']
).properties(
    width=600,
    height=200,
    title='Selected Neighborhood Characteristics'
)

dashboard = (geo_chart & detail_chart).configure_axis(
    labelFontSize=11,
    titleFontSize=12
).configure_title(
    fontSize=14,
    fontWeight='bold'
)
print("How to use: Click neighborhoods to see detailed metrics. Shift+Click for multiple.\n")
dashboard

## 3.2 ZIP Choropleth with Year Slider

This map extends the Task 2 ZIP-code choropleth by adding a **year slider** and **normalization options**.  
The slider animates median property value changes from 2015–2023, while normalization (min-max, log, or z-score) keeps the color scale consistent across years.  
Clicking a ZIP area reveals its detailed value trend below the map.

Unlike the single-year map in Task 2, this version shows both **spatial** and **temporal** dynamics—making it easier to identify which regions appreciated the most over time and when key changes occurred.


In [None]:

ZIP_GEOJSON_PATH = r"San_Francisco_ZIP_Codes_20251020.geojson"
VALUE_COL = "total_assessed_value"
YEAR_COL  = "year"

# Use existing ZIP column if present; otherwise derive from lat/lon via spatial join
if 'zip_code' in df.columns:
    ZIP_COL = 'zip_code'
    df_zip = df.dropna(subset=[ZIP_COL, VALUE_COL, YEAR_COL]).copy()
elif 'zipcode' in df.columns:
    ZIP_COL = 'zipcode'
    df_zip = df.dropna(subset=[ZIP_COL, VALUE_COL, YEAR_COL]).copy()
else:
    # derive ZIP via spatial join (make sure geopandas is imported as gpd)
    lat_col = next(c for c in df.columns if 'lat' in c.lower())
    lon_col = next(c for c in df.columns if 'lon' in c.lower() or 'lng' in c.lower())

    zips = gpd.read_file(ZIP_GEOJSON_PATH)
    if zips.crs is None:
        zips = zips.set_crs("EPSG:4326")
    elif zips.crs.to_string().lower() != "epsg:4326":
        zips = zips.to_crs("EPSG:4326")

    pts_src = df.dropna(subset=[lat_col, lon_col, VALUE_COL, YEAR_COL]).copy()
    pts = gpd.GeoDataFrame(
        pts_src,
        geometry=gpd.points_from_xy(pts_src[lon_col], pts_src[lat_col]),
        crs="EPSG:4326"
    )

    zip_field = next(c for c in zips.columns if any(k in c.lower() for k in ['zip','postal','zcta']))
    joined = gpd.sjoin(pts, zips[[zip_field, 'geometry']], how='left', predicate='within')\
               .rename(columns={zip_field: 'zip_code'})\
               .drop(columns=['geometry','index_right'])
    ZIP_COL = 'zip_code'
    df_zip = joined.dropna(subset=[ZIP_COL, VALUE_COL, YEAR_COL]).copy()

# Summarize ZIP × Year
df_zip[YEAR_COL] = df_zip[YEAR_COL].astype(int)
df_zip[ZIP_COL]  = df_zip[ZIP_COL].astype(str)
zip_yearly = (
    df_zip.groupby([ZIP_COL, YEAR_COL], as_index=False)
          .agg(median_value=(VALUE_COL, 'median'), count=(VALUE_COL, 'count'))
          .rename(columns={ZIP_COL: 'zipcode', YEAR_COL: 'year'})
)

# Attach full GeoJSON feature per ZIP (for Altair geojson shape)
import json
with open(ZIP_GEOJSON_PATH, "r", encoding="utf-8") as f:
    gj = json.load(f)
zip_key = next(k for k in gj['features'][0]['properties'] if any(t in k.lower() for t in ['zip','postal','zcta']))
zip_map = {str(feat['properties'][zip_key]): feat for feat in gj['features']}
zip_yearly['geometry'] = zip_yearly['zipcode'].map(zip_map)

print("✓ zip_yearly:", zip_yearly.shape)


In [None]:

year_min, year_max = int(zip_yearly['year'].min()), int(zip_yearly['year'].max())
yearParam   = alt.param(name="yearParam", bind=alt.binding_range(min=year_min, max=year_max, step=1), value=year_max)
zipSelect   = alt.selection_point(name="zipSelect", fields=["zipcode"], on="click", clear="true")
scaleMode   = alt.param(name="scaleModeParam", bind=alt.binding_select(options=["minmax","log","zscore","raw"]), value="minmax")

# Choropleth with normalization toggle
map_chart = (
    alt.Chart(zip_yearly)
    .transform_filter(alt.datum.year == yearParam)
    .transform_joinaggregate(
        mean_val='mean(median_value)', sd_val='stdev(median_value)',
        min_val='min(median_value)',  max_val='max(median_value)',
        groupby=['year']
    )
    .transform_calculate(
        norm="scaleModeParam == 'raw' ? datum.median_value : "
             "scaleModeParam == 'log' ? log(datum.median_value) : "
             "scaleModeParam == 'zscore' ? (datum.median_value - datum.mean_val) / (datum.sd_val == 0 ? 1e-9 : datum.sd_val) : "
             "(datum.max_val == datum.min_val ? 0.5 : (datum.median_value - datum.min_val) / (datum.max_val - datum.min_val))"
    )
    .mark_geoshape(stroke='white', strokeWidth=0.5)
    .encode(
        shape=alt.Shape('geometry:N', type='geojson'),
        color=alt.Color('norm:Q', title='Value (normalized)', scale=alt.Scale(scheme='blues')),
        opacity=alt.condition(zipSelect, alt.value(1), alt.value(0.9)),
        tooltip=[
            alt.Tooltip('zipcode:N', title='ZIP'),
            alt.Tooltip('year:O', title='Year'),
            alt.Tooltip('median_value:Q', title='Median (raw)', format='$,.0f'),
            alt.Tooltip('norm:Q', title='Normalized', format=',.3f')
        ]
    )
    .add_params(yearParam, zipSelect, scaleMode)
    .properties(width=520, height=480, title='ZIP Choropleth — Normalize & Click to Select')
)

# Only the selected ZIP’s line
line_sel = (
    alt.Chart(zip_yearly)
    .transform_filter(zipSelect)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x='year:O',
        y=alt.Y('median_value:Q', title='Median Property Value ($)', axis=alt.Axis(format='$,.0f')),
        color=alt.Color('zipcode:N', legend=None),
        tooltip=['zipcode:N','year:O','median_value:Q','count:Q']
    )
    .properties(width=620, height=280, title='Selected ZIP — Value Over Time')
)

alt.hconcat(map_chart, line_sel).resolve_scale(color='independent')


## 3.3 Neighborhood Value Trajectories (2015 = 100)

This interactive line chart compares how different neighborhoods’ median property values have evolved since 2015.  
Each neighborhood’s trend is indexed to 100 at 2015, making percentage growth directly comparable.  
Users can zoom or brush to focus on certain years, click legends to isolate neighborhoods, or use the year scrubber to view cross-sectional snapshots.

In Task 2, these trends were shown as static multi-line plots.  
The interactive version highlights **divergence and recovery patterns** more clearly—especially the post-2020 K-shaped recovery between affluent and working-class areas.


In [None]:
print("Building Task 3: Enhanced Neighborhood Trajectory Comparator (Scrollable + Working Legend)...\n")

import numpy as np
import pandas as pd
import altair as alt

# --------------------------
# PARAMETERS
# --------------------------
BASE_YEAR = 2015  # dataset scope 2015–2023

# --------------------------
# CLEAN & PREP DATA
# --------------------------
assert {'neighborhood', 'median_value', 'year'} <= set(neighborhood_yearly.columns), \
    "Expected columns: neighborhood, median_value, year"

df = neighborhood_yearly.copy()
df['year'] = pd.to_numeric(df['year'], errors='coerce').astype('Int64')
df = df.dropna(subset=['year', 'median_value', 'neighborhood'])
df['year'] = df['year'].astype(int)

min_year, max_year = int(df['year'].min()), int(df['year'].max())

# --------------------------
# TOP 12 NEIGHBORHOODS (by latest year median)
# --------------------------
latest_year = max_year
latest_stats = (
    df[df['year'] == latest_year]
    .groupby('neighborhood', as_index=False)['median_value'].median()
)
top_12 = latest_stats.nlargest(12, 'median_value')['neighborhood'].tolist()

traj = df[df['neighborhood'].isin(top_12)].copy()

# --------------------------
# NORMALIZATION TO BASE_YEAR (fallback = first available)
# --------------------------
base = (
    traj[traj['year'] == BASE_YEAR][['neighborhood', 'median_value']]
    .rename(columns={'median_value': 'base_value'})
)
traj = traj.merge(base, on='neighborhood', how='left')

fallback_base = (
    traj.sort_values(['neighborhood', 'year'])
       .groupby('neighborhood', as_index=False)
       .first()[['neighborhood', 'median_value']]
       .rename(columns={'median_value': 'fallback_base'})
)
traj = traj.merge(fallback_base, on='neighborhood', how='left')
traj['base_value'] = traj['base_value'].fillna(traj['fallback_base'])
traj = traj.drop(columns=['fallback_base'])
traj = traj[traj['base_value'].notna() & (traj['base_value'] != 0)].copy()

traj['normalized_value'] = (traj['median_value'] / traj['base_value']) * 100.0
traj['index_base_year'] = np.where(
    traj.groupby('neighborhood')['year'].transform(lambda s: (s == BASE_YEAR).any()),
    BASE_YEAR,
    traj.groupby('neighborhood')['year'].transform('min')
)
traj = traj.sort_values(['neighborhood', 'year'])

# --------------------------
# CITYWIDE BASELINE
# --------------------------
city = (
    df.groupby('year', as_index=False)['median_value'].median()
    .rename(columns={'median_value': 'city_median'})
).copy()

city_base_row = city[city['year'] == BASE_YEAR]
if not city_base_row.empty:
    city_base = float(city_base_row['city_median'].iloc[0])
    city_base_year = BASE_YEAR
else:
    first_row = city.sort_values('year').iloc[0]
    city_base = float(first_row['city_median'])
    city_base_year = int(first_row['year'])

city['city_index'] = (city['city_median'] / city_base) * 100.0
city['city_base_year'] = city_base_year

# --------------------------
# INTERACTIONS (each defined ONCE)
# --------------------------
# Legend-bound selection: define a name and attach ONLY to the chart with the legend
legend_sel = alt.selection_point(
    name='neigh_sel',
    fields=['neighborhood'],
    bind='legend',
    empty=True
)

# Brushing selection for the context (year range)
brush = alt.selection_interval(encodings=['x'])

# Year slider parameter
year_param = alt.param(
    name='Year',
    value=max(BASE_YEAR, min_year),
    bind=alt.binding_range(min=min_year, max=max_year, step=1)
)

# --------------------------
# MAIN TRAJECTORY LAYERS (wide so the output can scroll horizontally)
# --------------------------
MAIN_W, CTX_W, BARS_W = 1100, 1100, 420
MAIN_H, CTX_H, BARS_H = 420, 90, 420

base_enc = dict(
    x=alt.X('year:Q', title='Year', scale=alt.Scale(nice=False)),
    y=alt.Y('normalized_value:Q',
            title=f'Indexed Value (base = 100 at {BASE_YEAR} or first available)',
            scale=alt.Scale(zero=False)),
    color=alt.Color('neighborhood:N',
                    legend=alt.Legend(title='Click to Compare', orient='right', columns=1)),
    opacity=alt.condition(legend_sel, alt.value(1), alt.value(0.15)),
    strokeWidth=alt.condition(legend_sel, alt.value(3), alt.value(1)),
    detail='neighborhood:N',
    tooltip=[
        alt.Tooltip('neighborhood:N', title='Neighborhood'),
        alt.Tooltip('year:Q', title='Year', format='d'),
        alt.Tooltip('normalized_value:Q', title='Indexed Value', format='.1f'),
        alt.Tooltip('median_value:Q', title='Actual Value', format='$,.0f'),
        alt.Tooltip('index_base_year:Q', title='Index Base Year', format='d')
    ]
)

# Chart with legend + selection attached HERE
traj_chart = (
    alt.Chart(traj)
    .transform_filter(brush)
    .mark_line(point=True, clip=True)
    .encode(**base_enc)
    .add_params(legend_sel)  # attach selection ONLY here
    .properties(width=MAIN_W, height=MAIN_H)
)

# Rolling 3-yr mean overlay responds to legend selection too
rolling_overlay = (
    alt.Chart(traj)
    .transform_filter(brush)
    .transform_window(
        rolling_mean='mean(normalized_value)',
        frame=[-1, 1],
        groupby=['neighborhood']
    )
    .mark_line(strokeDash=[4, 2], clip=True)
    .encode(
        x='year:Q',
        y='rolling_mean:Q',
        color=alt.Color('neighborhood:N', legend=None),
        opacity=alt.condition(legend_sel, alt.value(1), alt.value(0.2)),
        detail='neighborhood:N'
    )
    .properties(width=MAIN_W, height=MAIN_H)
)

city_baseline = (
    alt.Chart(city)
    .transform_filter(brush)
    .mark_line(strokeWidth=3, color='#666', clip=True)
    .encode(x='year:Q', y='city_index:Q')
    .properties(width=MAIN_W, height=MAIN_H)
)

slider_rule = (
    alt.Chart(pd.DataFrame({'year': list(range(min_year, max_year + 1))}))
    .mark_rule(strokeDash=[6, 4], color='#777', clip=True)
    .encode(x='year:Q')
    .transform_filter(year_param)
    .properties(width=MAIN_W, height=MAIN_H)
)

main_layer = alt.layer(
    city_baseline,
    traj_chart,
    rolling_overlay,
    slider_rule
).properties(
    title='Neighborhood Value Trajectories (Brush to Zoom • Click Legend to Compare • Scrub Year)'
).resolve_scale(y='shared')

# --------------------------
# CONTEXT (brush chart) — define the brush HERE
# --------------------------
context = (
    alt.Chart(city)
    .mark_area(opacity=0.3, clip=True)
    .encode(
        x=alt.X('year:Q', title='Brush to select year range'),
        y=alt.Y('city_index:Q', title='City Index'),
    )
    .add_params(brush)
    .properties(width=CTX_W, height=CTX_H)
)

# --------------------------
# LINKED BAR CHART (actual $ by slider year) — no legend dependency (keeps things simple & robust)
# --------------------------
bars_df = df[df['neighborhood'].isin(top_12)].copy()
bars_chart = (
    alt.Chart(bars_df)
    .transform_filter(alt.datum.year == year_param)
    .mark_bar(clip=True)
    .encode(
        y=alt.Y('neighborhood:N', sort='-x', title=None),
        x=alt.X('median_value:Q', title='Median Value ($) in Selected Year'),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('year:Q', title='Year', format='d'),
            alt.Tooltip('median_value:Q', title='Median Value', format='$,.0f')
        ],
        color=alt.Color('neighborhood:N', legend=None)
    )
    .properties(width=BARS_W, height=BARS_H, title='Selected Year Comparison')
)

# --------------------------
# ASSEMBLE (wide => notebook adds horizontal scrollbar)
# --------------------------
left = alt.vconcat(main_layer, context, spacing=6).resolve_scale(color='independent')
final_comp = alt.hconcat(left, bars_chart, spacing=12).resolve_scale(color='independent')

# Add ONLY the year slider param at top level (legend_sel is attached to traj_chart already)
final_comp = final_comp.add_params(year_param)

print(" - Click legend entries to highlight neighborhoods (lines + rolling mean respond).")
print(" - Brush the bottom area to zoom the year range.")
print(" - Move the year slider to update the vertical rule and bar chart.\n")

final_comp


## 3.4 Property Value – Size Analyzer

This visualization explores how property area relates to total assessed value.  
A histogram on top allows users to **brush a size range**, instantly filtering the scatter plot below.  
Neighborhoods can be highlighted through the legend, and a LOESS curve summarizes the overall trend.  
A small text banner displays median values and $/sqft for the current selection.

The earlier Task 2 scatter plot showed only a general correlation.  
This interactive version enables targeted analysis—revealing where **price per square foot plateaus** and how relationships differ among neighborhoods.


In [None]:
print("Creating Property Value–Size Analyzer...\n")

import altair as alt
import pandas as pd
import numpy as np

# -------------------------
# Data prep (keeps your df_sample workflow)
# -------------------------
size_value_data = (
    df_sample[['property_area', 'total_assessed_value', 'neighborhood']]
    .dropna()
    .copy()
)

# Filter obvious junk and extreme outliers for readability
size_value_data = size_value_data[
    (size_value_data['property_area'] > 0) &
    (size_value_data['total_assessed_value'] > 0)
].copy()

p95_area = size_value_data['property_area'].quantile(0.95)
p95_val  = size_value_data['total_assessed_value'].quantile(0.95)

size_value_data = size_value_data[
    (size_value_data['property_area'] <= p95_area) &
    (size_value_data['total_assessed_value'] <= p95_val)
].copy()

# Helpful derived metric
size_value_data['value_per_sqft'] = (
    size_value_data['total_assessed_value'] / size_value_data['property_area']
)

# -------------------------
# Interactions
# -------------------------
# Brush on the X axis (property size) to filter the scatter
brush = alt.selection_interval(encodings=['x'], name='size_brush')

# Click-to-highlight neighborhoods via legend (no dropdown = fewer param issues)
neigh_sel = alt.selection_point(fields=['neighborhood'], bind='legend', name='neigh_pick')

# -------------------------
# Charts
# -------------------------

# 1) Size histogram with brush
size_hist = (
    alt.Chart(size_value_data, title='Drag to Select Property Size Range')
    .mark_bar()
    .encode(
        x=alt.X('property_area:Q',
                bin=alt.Bin(maxbins=40),
                title='Property Area (sq ft)'),
        y=alt.Y('count()', title='Number of Properties'),
        color=alt.condition(brush, alt.value('steelblue'), alt.value('lightgray')),
        # For binned tooltips, use the derived x/x2 channels (start/end)
        tooltip=[
            alt.Tooltip('count():Q', title='Properties', format=','),
            alt.Tooltip('x:Q',  title='Area start', format=',.0f'),
            alt.Tooltip('x2:Q', title='Area end',   format=',.0f'),
        ]
    )
    .add_params(brush, neigh_sel)
    .properties(width=700, height=160)
)

# 2) Selection summary text (count + medians) for current brush
stats_line = (
    alt.Chart(size_value_data)
    .transform_filter(brush)
    .transform_aggregate(
        count='count()',
        med_val='median(total_assessed_value)',
        med_vpsf='median(value_per_sqft)'
    )
    .transform_calculate(
        label='join(['
              '"Selected: ", format(datum.count, ","),'
              '"  |  Median Value: $", format(datum.med_val, ",.0f"),'
              '"  |  Median $/sqft: $", format(datum.med_vpsf, ",.0f")'
              '], "")'
    )
    .mark_text(align='left')
    .encode(text='label:N')
    .properties(width=700, height=22, title='Selection Summary')
)

# 3) Scatter (Value vs Size), filtered by brush, highlighted by legend selection
points = (
    alt.Chart(size_value_data, title='Property Value vs. Size (Filtered by Size Brush)')
    .mark_circle(size=22)
    .encode(
        x=alt.X('property_area:Q', title='Property Area (sq ft)', scale=alt.Scale(zero=False)),
        y=alt.Y('total_assessed_value:Q', title='Total Assessed Value ($)', axis=alt.Axis(format='$,.0f')),
        color=alt.Color('neighborhood:N', legend=alt.Legend(title='Neighborhood')),
        opacity=alt.condition(neigh_sel, alt.value(0.9), alt.value(0.25)),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('property_area:Q', title='Area (sq ft)', format=',.0f'),
            alt.Tooltip('total_assessed_value:Q', title='Value', format='$,.0f'),
            alt.Tooltip('value_per_sqft:Q', title='$ per sqft', format='$,.0f'),
        ]
    )
    .transform_filter(brush)
    .add_params(neigh_sel)
    .properties(width=700, height=360)
)

# 4) Smooth trendline (LOESS) over brushed data
trend = (
    alt.Chart(size_value_data)
    .transform_filter(brush)
    .transform_loess('property_area', 'total_assessed_value', bandwidth=0.3)
    .mark_line(strokeWidth=2)
    .encode(
        x='property_area:Q',
        y='total_assessed_value:Q'
    )
    .properties(width=700, height=360)
)

scatter_combo = (points + trend).interactive()  # enables pan/zoom on axes

# -------------------------
# Compose & configure (top-level only to avoid VConcat config errors)
# -------------------------
size_analyzer = alt.vconcat(
    size_hist,
    stats_line,
    scatter_combo
).configure_axis(
    labelFontSize=11,
    titleFontSize=12
).configure_title(
    fontSize=14,
    fontWeight='bold'
)

print("How to use:\n"
      "• Drag on the histogram to filter by property size range.\n"
      "• Click neighborhoods in the legend to highlight them.\n"
      "• Hover for details; zoom/pan in the scatter as needed.\n")

size_analyzer


In [None]:
print("Creating Gentrification Risk Assessment Tool (enhanced, fixed)…\n")

import altair as alt
from sklearn.preprocessing import MinMaxScaler

# ---------- Prep ----------
risk_data = current_stats.copy()

scaler = MinMaxScaler(feature_range=(0, 100))
risk_data['appreciation_risk'] = scaler.fit_transform(risk_data[['appreciation_pct']])
risk_data['land_value_risk']   = scaler.fit_transform(risk_data[['land_pct']])
risk_data['age_risk']          = scaler.fit_transform(risk_data[['building_age']])
risk_data['value_risk']        = scaler.fit_transform(risk_data[['median_value']])

risk_data['composite_risk'] = (
    0.35 * risk_data['appreciation_risk'] +
    0.25 * risk_data['land_value_risk'] +
    0.25 * risk_data['age_risk'] +
    0.15 * risk_data['value_risk']
)

top_risk = risk_data.nlargest(15, 'composite_risk').copy()

# Long form for parallel coords
risk_long = top_risk.melt(
    id_vars=['neighborhood'],
    value_vars=['appreciation_risk', 'land_value_risk', 'age_risk', 'composite_risk'],
    var_name='risk_type',
    value_name='risk_score'
)

axis_map = {
    'appreciation_risk': 0,
    'land_value_risk'  : 1,
    'age_risk'         : 2,
    'composite_risk'   : 3
}
risk_long['axis_position'] = risk_long['risk_type'].map(axis_map)

label_map = {
    'appreciation_risk': 'Rapid Appreciation',
    'land_value_risk'  : 'High Land Value %',
    'age_risk'         : 'Old Buildings',
    'composite_risk'   : 'Composite Risk'
}
risk_long['risk_label'] = risk_long['risk_type'].map(label_map)

# ---------- Selections & Controls ----------
# Legend multi-select to highlight neighborhoods
neigh_sel = alt.selection_point(fields=['neighborhood'], bind='legend', name='Pick')

# Hover to bold one line
hover_sel = alt.selection_point(fields=['neighborhood'], on='mouseover', empty='none', name='Hover')

# Slider thresholds (min values)
app_min  = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Rapid Appreciation'))
land_min = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Land %'))
age_min  = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Old Buildings'))

# Attach per-neighborhood metrics on each row (so sliders filter whole lines)
lookup_fields = ['appreciation_risk', 'land_value_risk', 'age_risk', 'composite_risk']
lookup_data = alt.InlineData(values=top_risk.to_dict('records'))

# Axis labels via array lookup (no ternary; avoids parse error)
axis_labels = "['Rapid Appreciation','High Land Value %','Old Buildings','Composite Risk'][datum.value]"

# ---------- Parallel Coordinates ----------
base = (
    alt.Chart(risk_long)
    .transform_lookup(
        lookup='neighborhood',
        from_=alt.LookupData(lookup_data, 'neighborhood', lookup_fields)
    )
    .transform_filter(f"datum.appreciation_risk >= {app_min.name} && "
                      f"datum.land_value_risk >= {land_min.name} && "
                      f"datum.age_risk >= {age_min.name}")
    .add_params(app_min, land_min, age_min, neigh_sel, hover_sel)
)

lines = (
    base.mark_line()
    .encode(
        x=alt.X('axis_position:Q', title='Risk Indicators',
                axis=alt.Axis(values=[0,1,2,3], labelExpr=axis_labels)),
        y=alt.Y('risk_score:Q', title='Risk Score (0 = Low, 100 = High)', scale=alt.Scale(domain=[0,100])),
        color=alt.Color('neighborhood:N', legend=alt.Legend(title='Neighborhood', orient='right', columns=1)),
        detail='neighborhood:N',
        # One predicate per channel to avoid nested-condition errors:
        opacity=alt.condition(neigh_sel, alt.value(0.95), alt.value(0.25)),
        strokeWidth=alt.condition(hover_sel, alt.value(3.0), alt.value(1.2)),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('risk_label:N',   title='Indicator'),
            alt.Tooltip('risk_score:Q',   title='Score', format='.1f')
        ]
    )
    .properties(
        width=720, height=420,
        title='Gentrification Risk — Hover to Highlight • Sliders Filter • Legend to Compare'
    )
)

# ---------- Linked Top-N Bar (updates with sliders) ----------
bars = (
    alt.Chart(top_risk)
    .transform_filter(f"datum.appreciation_risk >= {app_min.name} && "
                      f"datum.land_value_risk >= {land_min.name} && "
                      f"datum.age_risk >= {age_min.name}")
    .mark_bar()
    .encode(
        y=alt.Y('neighborhood:N', sort='-x', title=None),
        x=alt.X('composite_risk:Q', title='Composite Risk'),
        color=alt.Color('neighborhood:N', legend=None),
        opacity=alt.condition(neigh_sel, alt.value(1.0), alt.value(0.35)),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('composite_risk:Q', title='Composite Risk', format='.1f'),
            alt.Tooltip('appreciation_risk:Q', title='Rapid Appreciation', format='.1f'),
            alt.Tooltip('land_value_risk:Q',   title='High Land %',        format='.1f'),
            alt.Tooltip('age_risk:Q',          title='Old Buildings',      format='.1f')
        ]
    )
    .add_params(neigh_sel)  # click a bar to focus that neighborhood everywhere
    .properties(width=360, height=420, title='Top Neighborhoods (filtered)')
)

risk_tool = alt.hconcat(lines, bars).resolve_scale(color='independent') \
    .configure_axis(labelFontSize=11, titleFontSize=12) \
    .configure_title(fontSize=14, fontWeight='bold')


print("  • Use the sliders to set minimum levels for Rapid Appreciation, Land %, and Old Buildings.")
print("  • Click neighborhoods in the legend or bars to focus them (multi-select supported).")
print("  • Hover a line to bold it; tooltips show exact values.\n")

risk_tool


## 3.5 Gentrification Risk Assessment

This visualization builds on the Task 2 parallel-coordinates chart by adding rich interactivity and multiple linked views.  
It combines four normalized indicators — **Rapid Appreciation**, **High Land Value %**, **Old Buildings**, and a **Composite Risk Score** — to help identify neighborhoods most exposed to redevelopment pressure.

**Key Features**
- **Interactive sliders** to set minimum thresholds for appreciation rate, land value %, and building age.  
- **Parallel coordinates plot** where each line represents one neighborhood; hover to highlight or compare across axes.  
- **Legend multi-select** to focus on specific neighborhoods.  
- **Linked bar chart** on the right showing the top-risk neighborhoods, dynamically updating with slider filters and selections.

Compared to the static plot in Task 2, this interactive version lets users **filter and compare risk patterns in real time**.  
It makes it easy to see how different risk factors overlap — for example, older neighborhoods with rapidly appreciating land values often rank highest on composite risk.


In [None]:
print("Creating Gentrification Risk Assessment Tool (enhanced, fixed)…\n")

import altair as alt
from sklearn.preprocessing import MinMaxScaler

# ---------- Prep ----------
risk_data = current_stats.copy()

scaler = MinMaxScaler(feature_range=(0, 100))
risk_data['appreciation_risk'] = scaler.fit_transform(risk_data[['appreciation_pct']])
risk_data['land_value_risk']   = scaler.fit_transform(risk_data[['land_pct']])
risk_data['age_risk']          = scaler.fit_transform(risk_data[['building_age']])
risk_data['value_risk']        = scaler.fit_transform(risk_data[['median_value']])

risk_data['composite_risk'] = (
    0.35 * risk_data['appreciation_risk'] +
    0.25 * risk_data['land_value_risk'] +
    0.25 * risk_data['age_risk'] +
    0.15 * risk_data['value_risk']
)

top_risk = risk_data.nlargest(15, 'composite_risk').copy()

# Long form for parallel coords
risk_long = top_risk.melt(
    id_vars=['neighborhood'],
    value_vars=['appreciation_risk', 'land_value_risk', 'age_risk', 'composite_risk'],
    var_name='risk_type',
    value_name='risk_score'
)

axis_map = {
    'appreciation_risk': 0,
    'land_value_risk'  : 1,
    'age_risk'         : 2,
    'composite_risk'   : 3
}
risk_long['axis_position'] = risk_long['risk_type'].map(axis_map)

label_map = {
    'appreciation_risk': 'Rapid Appreciation',
    'land_value_risk'  : 'High Land Value %',
    'age_risk'         : 'Old Buildings',
    'composite_risk'   : 'Composite Risk'
}
risk_long['risk_label'] = risk_long['risk_type'].map(label_map)

# ---------- Selections & Controls ----------
# Legend multi-select to highlight neighborhoods
neigh_sel = alt.selection_point(fields=['neighborhood'], bind='legend', name='Pick')

# Hover to bold one line
hover_sel = alt.selection_point(fields=['neighborhood'], on='mouseover', empty='none', name='Hover')

# Slider thresholds (min values)
app_min  = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Rapid Appreciation'))
land_min = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Land %'))
age_min  = alt.param(value=0, bind=alt.binding_range(min=0, max=100, step=1, name='Min Old Buildings'))

# Attach per-neighborhood metrics on each row (so sliders filter whole lines)
lookup_fields = ['appreciation_risk', 'land_value_risk', 'age_risk', 'composite_risk']
lookup_data = alt.InlineData(values=top_risk.to_dict('records'))

# Axis labels via array lookup (no ternary; avoids parse error)
axis_labels = "['Rapid Appreciation','High Land Value %','Old Buildings','Composite Risk'][datum.value]"

# ---------- Parallel Coordinates ----------
base = (
    alt.Chart(risk_long)
    .transform_lookup(
        lookup='neighborhood',
        from_=alt.LookupData(lookup_data, 'neighborhood', lookup_fields)
    )
    .transform_filter(f"datum.appreciation_risk >= {app_min.name} && "
                      f"datum.land_value_risk >= {land_min.name} && "
                      f"datum.age_risk >= {age_min.name}")
    .add_params(app_min, land_min, age_min, neigh_sel, hover_sel)
)

lines = (
    base.mark_line()
    .encode(
        x=alt.X('axis_position:Q', title='Risk Indicators',
                axis=alt.Axis(values=[0,1,2,3], labelExpr=axis_labels)),
        y=alt.Y('risk_score:Q', title='Risk Score (0 = Low, 100 = High)', scale=alt.Scale(domain=[0,100])),
        color=alt.Color('neighborhood:N', legend=alt.Legend(title='Neighborhood', orient='right', columns=1)),
        detail='neighborhood:N',
        # One predicate per channel to avoid nested-condition errors:
        opacity=alt.condition(neigh_sel, alt.value(0.95), alt.value(0.25)),
        strokeWidth=alt.condition(hover_sel, alt.value(3.0), alt.value(1.2)),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('risk_label:N',   title='Indicator'),
            alt.Tooltip('risk_score:Q',   title='Score', format='.1f')
        ]
    )
    .properties(
        width=720, height=420,
        title='Gentrification Risk — Hover to Highlight • Sliders Filter • Legend to Compare'
    )
)

# ---------- Linked Top-N Bar (updates with sliders) ----------
bars = (
    alt.Chart(top_risk)
    .transform_filter(f"datum.appreciation_risk >= {app_min.name} && "
                      f"datum.land_value_risk >= {land_min.name} && "
                      f"datum.age_risk >= {age_min.name}")
    .mark_bar()
    .encode(
        y=alt.Y('neighborhood:N', sort='-x', title=None),
        x=alt.X('composite_risk:Q', title='Composite Risk'),
        color=alt.Color('neighborhood:N', legend=None),
        opacity=alt.condition(neigh_sel, alt.value(1.0), alt.value(0.35)),
        tooltip=[
            alt.Tooltip('neighborhood:N', title='Neighborhood'),
            alt.Tooltip('composite_risk:Q', title='Composite Risk', format='.1f'),
            alt.Tooltip('appreciation_risk:Q', title='Rapid Appreciation', format='.1f'),
            alt.Tooltip('land_value_risk:Q',   title='High Land %',        format='.1f'),
            alt.Tooltip('age_risk:Q',          title='Old Buildings',      format='.1f')
        ]
    )
    .add_params(neigh_sel)  # click a bar to focus that neighborhood everywhere
    .properties(width=360, height=420, title='Top Neighborhoods (filtered)')
)

risk_tool = alt.hconcat(lines, bars).resolve_scale(color='independent') \
    .configure_axis(labelFontSize=11, titleFontSize=12) \
    .configure_title(fontSize=14, fontWeight='bold')


print("  • Use the sliders to set minimum levels for Rapid Appreciation, Land %, and Old Buildings.")
print("  • Click neighborhoods in the legend or bars to focus them (multi-select supported).")
print("  • Hover a line to bold it; tooltips show exact values.\n")

risk_tool
