# Lubbock Property Analysis Notebook
This notebook performs a comprehensive analysis of property values in Lubbock, focusing on council districts and zones (Redlining Zone, Half-mile Buffer, Between 0.5 and 1 Mile Buffer, One-mile Buffer, and Outside). It calculates statistics (mean, median, mode, min, max) for property values across multiple years, with and without outliers. The results are visualized through interactive maps (with a checkbox menu to toggle district-zone intersections), bar plots, violin plots, and heatmaps, all saved in a structured folder system.

### Cell 0: Importing Libraries
#### Purpose
Import all necessary libraries for data processing, spatial analysis, and visualization.

In [None]:
# Import the requried modules/libraries
import re
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from matplotlib.gridspec import GridSpec
import textwrap
from functools import reduce
import time
import os
import json
import geopandas as gpd
import folium
import zipfile
from folium.features import DivIcon
from IPython.display import IFrame, display, HTML
from folium.plugins import MarkerCluster
from google.colab import files
from folium import GeoJson, FeatureGroup
from branca.colormap import linear
import plotly.express as px
from folium.plugins import HeatMap



# Mount the google drive
from google.colab import drive
drive.mount('/content/drive')

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Set pandas display options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Cell 1: Setup and Data Preparation
#### Purpose
Prepare the property dataset and shapefiles for analysis by loading data, filtering it, and ensuring compatibility.

#### Sub-Steps
1. **Define Paths**: Specify paths to the property data CSV and shapefiles.
2. **Load Shapefiles**: Create a function to load and standardize shapefiles to EPSG:4326.
3. **Load and Filter Property Data**: Read the CSV and filter for Lubbock properties.
4. **Convert to GeoDataFrame**: Transform the property data into a spatial format.

In [None]:
# 1. Define Paths
base_path = "/content/drive/MyDrive/RA Part-time UMD/Lubbock Project/After Jan 16th 2025/"
data_path = f"{base_path}FinalData/Step3_AddedZoningData/dfall_with_zoning_details.csv"
shapefile_paths = {
    'City_Limit': f"{base_path}Shapefiles/CityLimit/CityLimit.shp",
    'Council_Districts': f"{base_path}Shapefiles/CouncilDistricts/CouncilDistricts.shp",
    'Redlining_Zone': f"{base_path}Shapefiles/RedliningZone/RedliningZone.shp",
    'Half_mile_Buffer': f"{base_path}Shapefiles/RedliningZoneHalfMile/RedliningZoneHalfMile.shp",
    'One_mile_Buffer': f"{base_path}Shapefiles/RedliningZoneOneMile/RedliningZoneOneMile.shp"
}

# 2. Load Shapefiles Function
def load_shapefile(path, name):
    gdf = gpd.read_file(path)
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    print(f"Loaded {name} with {len(gdf)} features")
    return gdf

shapefiles = {name: load_shapefile(path, name) for name, path in shapefile_paths.items()}
between_half_one = gpd.overlay(shapefiles['One_mile_Buffer'], shapefiles['Half_mile_Buffer'], how='difference')
shapefiles['Between_half_one'] = between_half_one
print(f"Created 'Between Half and One Mile' layer with {len(between_half_one)} features")

# 3. Load and Filter Property Data
df = pd.read_csv(data_path)
lubbock_data = df[df['IsInLubbock'] == 1].copy()
print(f"Filtered dataset to {len(lubbock_data)} properties within Lubbock city limits")

# Add ZoneCategory if missing
if 'ZoneCategory' not in lubbock_data.columns:
    def assign_zone(row):
        if row['IsInRedliningZone'] == 1:
            return 'Redlining Zone'
        elif row['IsInRedliningZone_half_mile'] == 1:
            return 'Half-mile Buffer'
        elif row['IsInRedliningZone_one_mile'] == 1:
            return 'One-mile Buffer'
        return 'Outside'
    lubbock_data['ZoneCategory'] = lubbock_data.apply(assign_zone, axis=1)
    print("Created ZoneCategory column")

# 4. Convert to GeoDataFrame
gdf_properties = gpd.GeoDataFrame(
    lubbock_data,
    geometry=gpd.points_from_xy(lubbock_data['Longitude'], lubbock_data['Latitude']),
    crs="EPSG:4326"
)

Loaded City_Limit with 1 features
Loaded Council_Districts with 6 features
Loaded Redlining_Zone with 1 features
Loaded Half_mile_Buffer with 1 features
Loaded One_mile_Buffer with 1 features
Created 'Between Half and One Mile' layer with 1 features
Filtered dataset to 96592 properties within Lubbock city limits
Created ZoneCategory column


### Cell 2: Assign Districts and Zones
#### Purpose
Assign council districts and zones to each property using spatial joins.

#### Sub-Steps
1. **Assign Districts**: Join properties with council districts.
2. **Assign Zones**: Add flags for each zone and handle the 'Outside' case.

In [None]:
# 1. Assign Districts
gdf_properties = gpd.sjoin(
    gdf_properties,
    shapefiles['Council_Districts'][['DISTRICT', 'geometry']],
    how='left', predicate='within'
).rename(columns={'DISTRICT': 'Assigned_District'}).drop(columns=['index_right'])

# 2. Assign Zones
zone_names = ['Redlining_Zone', 'Half_mile_Buffer', 'Between_half_one', 'One_mile_Buffer']
for zone in zone_names:
    temp_join = gpd.sjoin(
        gdf_properties,
        shapefiles[zone][['geometry']],
        how='left', predicate='within'
    )
    gdf_properties[f'In_{zone}'] = temp_join['index_right'].notna()
    gdf_properties = gdf_properties.drop(columns=['index_right'], errors='ignore')
gdf_properties['In_Outside'] = ~gdf_properties[[f'In_{z}' for z in zone_names]].any(axis=1)
print("Assigned districts and zones to properties")


Assigned districts and zones to properties


### Cell 3: Precompute District-Zone Overlaps
#### Purpose
Determine which district-zone combinations have overlapping areas to optimize analysis.

#### Sub-Steps
1. **Define Lists**: List districts and zones.
2. **Compute Overlaps**: Use spatial overlays to create a lookup dictionary.


In [None]:
# Define Lists
districts = [1, 2, 3, 4, 5, 6]
zones = ['Redlining_Zone', 'Half_mile_Buffer', 'Between_half_one', 'One_mile_Buffer', 'Outside']
years = ['1945', '1975', '1985', '2012', '2020', '2021']

# Compute Overlaps
overlap_dict = {}
for d in districts:
    district_gdf = shapefiles['Council_Districts'][shapefiles['Council_Districts']['DISTRICT'] == d]
    for z in zones:
        if z == 'Outside':
            result = district_gdf.copy()
            for zone_name in zone_names:
                result = gpd.overlay(result, shapefiles[zone_name][['geometry']], how='difference')
            overlap_dict[(d, z)] = not result.empty
        else:
            intersection = gpd.overlay(district_gdf, shapefiles[z][['geometry']], how='intersection')
            overlap_dict[(d, z)] = not intersection.empty
print("Computed overlaps for all district-zone combinations")

Computed overlaps for all district-zone combinations


### Cell 4: Statistical Analysis
#### Purpose
Calculate statistics for property values across years, districts, and zones.

#### Sub-Steps
1. **Define Helper Functions**: Functions for outlier removal and stats calculation.
2. **Generate Statistics**: Compute stats for each combination and scenario.

In [None]:
# Define Scenarios
scenarios = [('Outliers not removed', False), ('Outliers removed', True)]

# Helper Functions
def remove_outliers(df, column):
    Q1, Q3 = df[column].quantile([0.25, 0.75])
    IQR = Q3 - Q1
    bounds = [Q1 - 1.5 * IQR, Q3 + 1.5 * IQR]
    return df[df[column].between(*bounds)]

def calculate_stats(df, column):
    if df.empty:
        return {'mean': np.nan, 'median': np.nan, 'mode': np.nan, 'min': np.nan, 'max': np.nan, 'count': 0}
    values = df[column]
    bin_size = 1000  # Bin mode into $1000 intervals
    bins = np.arange(values.min(), values.max() + bin_size, bin_size)
    binned = pd.cut(values, bins)
    mode_bin = binned.mode().values[0] if not binned.mode().empty else np.nan
    mode_value = mode_bin.mid if pd.notna(mode_bin) else np.nan
    return {
        'mean': values.mean(),
        'median': values.median(),
        'mode': mode_value,
        'min': values.min(),
        'max': values.max(),
        'count': len(values)
    }

# Compute Statistics
stats_list = []
for year in years:
    for district in districts:
        for zone in zones:
            if not overlap_dict[(district, zone)]:
                continue
            district_filter = gdf_properties['Assigned_District'] == district
            zone_filter = gdf_properties['In_Outside'] if zone == 'Outside' else gdf_properties[f'In_{zone}']
            year_filter = gdf_properties[f'TotalValue_{year}'].notna()
            filtered_df = gdf_properties[district_filter & zone_filter & year_filter]
            for scenario, remove_flag in scenarios:
                df_to_compute = remove_outliers(filtered_df, f'TotalValue_{year}') if remove_flag else filtered_df
                stats = calculate_stats(df_to_compute, f'TotalValue_{year}')
                stats_row = {
                    'district': district,
                    'zone': zone,
                    'year': year,
                    'scenario': scenario,
                    **stats
                }
                stats_list.append(stats_row)
stats_df = pd.DataFrame(stats_list)
print("Computed statistics for all combinations")

Computed statistics for all combinations


### Cell 5: Set Up Folder Structure
#### Purpose
Create directories for visualizations, organized by scenario (outliers not removed vs. outliers removed).

In [None]:
# Base directory
vis_dir = '/content/drive/MyDrive/visualizations'
for scenario, _ in scenarios:
    os.makedirs(f"{vis_dir}/{scenario}/maps/mean", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/maps/median", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/maps/mode", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/maps/min", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/maps/max", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/bar_plots", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/violin_plots", exist_ok=True)
    os.makedirs(f"{vis_dir}/{scenario}/heatmaps", exist_ok=True)
print("Folder structure created")

Folder structure created


### Cell 6: Generate Interactive Maps with Checkbox Menu and Updated Tooltips

#### Purpose
Create Folium maps with:
- Checkbox menu to toggle district-zone intersections.
- Tooltips showing intersection name, statistic value (or "N/A" with reason, e.g., "All unique values"), and property count.
- Consistent color scales across both scenarios.
- Titles for each map.



In [None]:
# Professional zone display names
zone_display_names = {
    'Redlining_Zone': 'Redlining Zone',
    'Half_mile_Buffer': 'Half-mile Buffer',
    'Between_half_one': 'Between 0.5 and 1 Mile',
    'One_mile_Buffer': 'One-mile Buffer',
    'Outside': 'Outside'
}

# Calculate global min and max for each statistic across all scenarios
global_min_max = {}
for stat in ['mean', 'median', 'mode', 'min', 'max']:
    all_values = stats_df[stat].dropna()
    if not all_values.empty:
        global_min_max[stat] = (all_values.min(), all_values.max())
    else:
        global_min_max[stat] = (0, 1)  # Default if no data

# Precompute intersections GeoDataFrame
intersections_list = []
for d in districts:
    district_gdf = shapefiles['Council_Districts'][shapefiles['Council_Districts']['DISTRICT'] == d]
    for z in zones:
        if overlap_dict[(d, z)]:
            if z == 'Outside':
                result = district_gdf.copy()
                for zone_name in zone_names:
                    result = gpd.overlay(result, shapefiles[zone_name][['geometry']], how='difference')
                if not result.empty:
                    result['district'] = d
                    result['zone'] = z
                    intersections_list.append(result)
            else:
                intersection = gpd.overlay(district_gdf, shapefiles[z][['geometry']], how='intersection')
                if not intersection.empty:
                    intersection['district'] = d
                    intersection['zone'] = z
                    intersections_list.append(intersection)
intersections_gdf = gpd.GeoDataFrame(pd.concat(intersections_list, ignore_index=True), crs="EPSG:4326")

for scenario, _ in scenarios:
    for year in years:
        for stat in ['mean', 'median', 'mode', 'min', 'max']:
            # Filter stats for this scenario and year
            stats_subset = stats_df[(stats_df['year'] == year) & (stats_df['scenario'] == scenario)]
            # Merge with intersections_gdf
            map_df = intersections_gdf.merge(stats_subset[['district', 'zone', stat, 'count']],
                                            on=['district', 'zone'], how='left')

            # Create base map
            m = folium.Map(location=[33.58, -101.85], zoom_start=11, tiles='CartoDB dark_matter')

            # Consistent logarithmic color scale across all scenarios
            min_val, max_val = global_min_max[stat]
            colormap = linear.YlOrRd_09.scale(np.log10(min_val) if min_val > 0 else 0, np.log10(max_val))
            colormap.caption = f'Log10 of {stat.capitalize()} Property Value'

            # Add intersections as toggleable layers
            for _, row in map_df.iterrows():
                value = row[stat]
                count = row['count'] if pd.notna(row['count']) else 0
                # Color: gray for zero properties or undefined stat
                if count == 0 or pd.isna(value):
                    color = 'gray'
                else:
                    color = colormap(np.log10(value) if value > 0 else 0)
                # Create feature group for toggle
                fg = FeatureGroup(name=f"D{row['district']} - {row['zone']}", show=True)
                # Statistic display with reason
                if pd.isna(value):
                    if count == 0:
                        stat_display = 'N/A (No properties)'
                    elif stat == 'mode':
                        stat_display = 'N/A (All unique values)'
                    else:
                        stat_display = 'N/A'
                else:
                    stat_display = f"{value:.2f}"
                # Define GeoJSON feature with tooltip properties
                geojson_feature = {
                    "type": "Feature",
                    "geometry": row['geometry'].__geo_interface__,
                    "properties": {
                        "intersection": f"District {row['district']} - {zone_display_names[row['zone']]}",
                        "stat_value": stat_display,
                        "count": int(count)
                    }
                }
                # Add GeoJson layer with style and tooltip
                GeoJson(
                    geojson_feature,
                    style_function=lambda x, c=color: {'fillColor': c, 'color': 'black', 'weight': 1, 'fillOpacity': 0.7},
                    tooltip=folium.GeoJsonTooltip(
                        fields=['intersection', 'stat_value', 'count'],
                        aliases=['Intersection', f'{stat.capitalize()}', 'Property Count'],
                        localize=True
                    )
                ).add_to(fg)
                fg.add_to(m)

            # Add color scale and layer control
            colormap.add_to(m)
            folium.LayerControl(collapsed=False).add_to(m)

            # Add title
            title_html = f'<h3 align="center" style="font-size:16px"><b>{stat.capitalize()} Property Values in {year} ({scenario})</b></h3>'
            m.get_root().html.add_child(folium.Element(title_html))

            # Save map
            m.save(f"{vis_dir}/{scenario}/maps/{stat}/{year}.html")
            print(f"Saved map for {stat} in {year} for {scenario}")

Saved map for mean in 1945 for Outliers not removed
Saved map for median in 1945 for Outliers not removed
Saved map for mode in 1945 for Outliers not removed
Saved map for min in 1945 for Outliers not removed
Saved map for max in 1945 for Outliers not removed
Saved map for mean in 1975 for Outliers not removed
Saved map for median in 1975 for Outliers not removed
Saved map for mode in 1975 for Outliers not removed
Saved map for min in 1975 for Outliers not removed
Saved map for max in 1975 for Outliers not removed
Saved map for mean in 1985 for Outliers not removed
Saved map for median in 1985 for Outliers not removed
Saved map for mode in 1985 for Outliers not removed
Saved map for min in 1985 for Outliers not removed
Saved map for max in 1985 for Outliers not removed
Saved map for mean in 2012 for Outliers not removed
Saved map for median in 2012 for Outliers not removed
Saved map for mode in 2012 for Outliers not removed
Saved map for min in 2012 for Outliers not removed
Saved map f

### Cell 7: Generate Labeled Bar Plots

#### Purpose
Create interactive, labeled bar plots for each statistic.

In [None]:
for scenario, _ in scenarios:
    for year in years:
        for stat in ['mean', 'median', 'mode', 'min', 'max']:
            stats_subset = stats_df[(stats_df['year'] == year) & (stats_df['scenario'] == scenario)]
            fig = px.bar(
                stats_subset,
                x='zone',
                y=stat,
                color='district',
                barmode='group',
                title=f'{stat.capitalize()} Property Values in {year} ({scenario})',
                labels={'zone': 'Zone', stat: f'{stat.capitalize()} Value', 'district': 'District'}
            )
            fig.write_html(f"{vis_dir}/{scenario}/bar_plots/{stat}_{year}.html")
            print(f"Saved bar plot for {stat} in {year} for {scenario}")

Saved bar plot for mean in 1945 for Outliers not removed
Saved bar plot for median in 1945 for Outliers not removed
Saved bar plot for mode in 1945 for Outliers not removed
Saved bar plot for min in 1945 for Outliers not removed
Saved bar plot for max in 1945 for Outliers not removed
Saved bar plot for mean in 1975 for Outliers not removed
Saved bar plot for median in 1975 for Outliers not removed
Saved bar plot for mode in 1975 for Outliers not removed
Saved bar plot for min in 1975 for Outliers not removed
Saved bar plot for max in 1975 for Outliers not removed
Saved bar plot for mean in 1985 for Outliers not removed
Saved bar plot for median in 1985 for Outliers not removed
Saved bar plot for mode in 1985 for Outliers not removed
Saved bar plot for min in 1985 for Outliers not removed
Saved bar plot for max in 1985 for Outliers not removed
Saved bar plot for mean in 2012 for Outliers not removed
Saved bar plot for median in 2012 for Outliers not removed
Saved bar plot for mode in 20

### Cell 8: Generate Labeled Violin Plots

#### Purpose
Create labeled violin plots showing property value distributions by zone.

In [None]:
# Pre-filter properties
filtered_props = {}
for year in years:
    year_props = gdf_properties[gdf_properties[f'TotalValue_{year}'].notna()]
    filtered_props[year] = {
        'Outliers not removed': year_props,
        'Outliers removed': remove_outliers(year_props, f'TotalValue_{year}')
    }

for scenario, _ in scenarios:
    for year in years:
        df_to_plot = filtered_props[year][scenario]
        fig = px.violin(
            df_to_plot,
            y=f'TotalValue_{year}',
            x='ZoneCategory',
            box=True,
            title=f'Property Value Distribution in {year} ({scenario})',
            labels={'ZoneCategory': 'Zone', f'TotalValue_{year}': 'Property Value'}
        )
        fig.write_html(f"{vis_dir}/{scenario}/violin_plots/{year}.html")
        print(f"Saved violin plot for {year} for {scenario}")

Saved violin plot for 1945 for Outliers not removed
Saved violin plot for 1975 for Outliers not removed
Saved violin plot for 1985 for Outliers not removed
Saved violin plot for 2012 for Outliers not removed
Saved violin plot for 2020 for Outliers not removed
Saved violin plot for 2021 for Outliers not removed
Saved violin plot for 1945 for Outliers removed
Saved violin plot for 1975 for Outliers removed
Saved violin plot for 1985 for Outliers removed
Saved violin plot for 2012 for Outliers removed
Saved violin plot for 2020 for Outliers removed
Saved violin plot for 2021 for Outliers removed


### Cell 9: Generate Heatmaps

#### Purpose
Create heatmaps showing property value density.

In [None]:
for scenario, _ in scenarios:
    for year in years:
        df_to_plot = filtered_props[year][scenario]
        m = folium.Map(location=[33.58, -101.85], zoom_start=11, tiles='CartoDB dark_matter')
        HeatMap(df_to_plot[['Latitude', 'Longitude', f'TotalValue_{year}']].values.tolist(), radius=10).add_to(m)
        m.save(f"{vis_dir}/{scenario}/heatmaps/{year}.html")
        print(f"Saved heatmap for {year} for {scenario}")

Saved heatmap for 1945 for Outliers not removed
Saved heatmap for 1975 for Outliers not removed
Saved heatmap for 1985 for Outliers not removed
Saved heatmap for 2012 for Outliers not removed
Saved heatmap for 2020 for Outliers not removed
Saved heatmap for 2021 for Outliers not removed
Saved heatmap for 1945 for Outliers removed
Saved heatmap for 1975 for Outliers removed
Saved heatmap for 1985 for Outliers removed
Saved heatmap for 2012 for Outliers removed
Saved heatmap for 2020 for Outliers removed
Saved heatmap for 2021 for Outliers removed


### Cell 10: Create Dashboards

#### Purpose
Generate HTML dashboards linking to all visualizations.

In [None]:
for scenario, _ in scenarios:
    dashboard_html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Lubbock Property Dashboard - {scenario}</title>
        <style>
            .tab {{ overflow: hidden; border: 1px solid #ccc; background-color: #f1f1f1; }}
            .tab button {{ float: left; border: none; padding: 14px 16px; cursor: pointer; }}
            .tabcontent {{ display: none; padding: 10px; border: 1px solid #ccc; }}
        </style>
    </head>
    <body>
        <h1>Lubbock Property Dashboard - {scenario}</h1>
        <div class="tab">
    """
    for year in years:
        dashboard_html += f'<button onclick="openTab(event, \'{year}\')">{year}</button>'
    dashboard_html += "</div>"
    for year in years:
        dashboard_html += f'<div id="{year}" class="tabcontent"><h2>{year}</h2>'
        for stat in ['mean', 'median', 'mode', 'min', 'max']:
            dashboard_html += f'<h3>{stat.capitalize()}</h3>'
            dashboard_html += f'<p><a href="maps/{stat}/{year}.html">Map</a> | <a href="bar_plots/{stat}_{year}.html">Bar Plot</a></p>'
        dashboard_html += f'<h3>Distribution</h3>'
        dashboard_html += f'<p><a href="violin_plots/{year}.html">Violin Plot</a> | <a href="heatmaps/{year}.html">Heatmap</a></p>'
        dashboard_html += '</div>'
    dashboard_html += """
    <script>
        function openTab(evt, year) {
            var tabcontent = document.getElementsByClassName("tabcontent");
            for (var i = 0; i < tabcontent.length; i++) tabcontent[i].style.display = "none";
            var tablinks = document.getElementsByTagName("button");
            for (var i = 0; i < tablinks.length; i++) tablinks[i].className = "";
            document.getElementById(year).style.display = "block";
            evt.currentTarget.className = "active";
        }
        document.getElementsByTagName("button")[0].click();
    </script>
    </body></html>
    """
    with open(f"{vis_dir}/{scenario}/dashboard.html", 'w') as f:
        f.write(dashboard_html)
    print(f"Saved dashboard for {scenario}")

Saved dashboard for Outliers not removed
Saved dashboard for Outliers removed


## Folder Structure

```
visualizations/
├── Outliers not removed/
│   ├── dashboard.html
│   ├── maps/
│   │   ├── mean/
│   │   │   ├── 1945.html
│   │   │   └── ...
│   │   ├── median/
│   │   └── ... (for mode, min, max)
│   ├── bar_plots/
│   ├── violin_plots/
│   └── heatmaps/
└── Outliers removed/
    ├── dashboard.html
    ├── maps/
    │   ├── mean/
    │   └── ...
    ├── bar_plots/
    ├── violin_plots/
    └── heatmaps/
```

# dd

### Cell 5: Visualizations for All Statistics
#### Purpose
Generate and save visualizations (maps, bar plots, heatmaps, violin plots) for each statistic, organized by folders.

#### Sub-Steps
1. **Setup Folders**: Create directories for each statistic.
2. **Interactive Maps**: Generate Folium maps for each statistic and year.
3. **Bar Plots**: Create interactive bar plots with Plotly.
4. **Heatmaps**: Generate heatmaps for property value density.
5. **Violin Plots**: Add an uncommon visualization for distribution.

In [None]:
# 1. Setup Folders
vis_dir = '/content/drive/MyDrive/visualizations'
stats = ['mean', 'median', 'mode', 'min', 'max']
for stat in stats:
    os.makedirs(f"{vis_dir}/{stat}", exist_ok=True)
os.makedirs(f"{vis_dir}/extra", exist_ok=True)

# 2. Interactive Maps
def create_map(year, stat, gdf):
    values = gdf[f'{stat}_{year}'].dropna()
    if values.empty:
        return
    m = folium.Map(location=[33.58, -101.85], zoom_start=11)
    for _, row in gdf.iterrows():
        value = row[f'{stat}_{year}']
        if pd.notna(value):
            folium.GeoJson(
                row['geometry'],
                style_function=lambda x, v=value: {
                    'fillColor': 'blue' if v > values.median() else 'red',
                    'color': 'black',
                    'fillOpacity': 0.7
                },
                tooltip=f"D{row['district']} - {row['zone']}: {value:.2f}"
            ).add_to(m)
    m.save(f"{vis_dir}/{stat}/{year}.html")

for year in years:
    for stat in stats:
        create_map(year, stat, intersections_gdf)
print("Generated interactive maps")

# 3. Bar Plots
def create_bar_plot(year, stat):
    df = intersections_gdf[['district', 'zone', f'{stat}_{year}']].dropna()
    fig = px.bar(df, x='zone', y=f'{stat}_{year}', color='district', barmode='group',
                 title=f'{stat.capitalize()} Property Values in {year}')
    fig.write_html(f"{vis_dir}/{stat}/{year}_bar.html")

for year in years:
    for stat in stats:
        create_bar_plot(year, stat)
print("Generated bar plots")

# 4. Heatmaps
def create_heatmap(year):
    data = gdf_properties[['Latitude', 'Longitude', f'TotalValue_{year}']].dropna()
    m = folium.Map(location=[33.58, -101.85], zoom_start=11)
    HeatMap(data.values.tolist(), radius=10).add_to(m)
    m.save(f"{vis_dir}/extra/heatmap_{year}.html")

for year in years:
    create_heatmap(year)
print("Generated heatmaps")

# 5. Violin Plots
def create_violin_plot(year, stat):
    df = gdf_properties[[f'TotalValue_{year}', 'ZoneCategory']].dropna()
    fig = px.violin(df, y=f'TotalValue_{year}', x='ZoneCategory', box=True,
                    title=f'{stat.capitalize()} Distribution in {year}')
    fig.write_html(f"{vis_dir}/{stat}/{year}_violin.html")

for year in years:
    for stat in ['mean']:  # Example for mean; extend as needed
        create_violin_plot(year, stat)
print("Generated violin plots")

Generated interactive maps
Generated bar plots
Generated heatmaps
Generated violin plots


### Cell 6: Create Dashboard
#### Purpose
Consolidate all visualizations into an interactive HTML dashboard.

#### Sub-Steps
1. **Generate HTML**: Create a tabbed interface with links to visualizations.
2. **Save Dashboard**: Output the dashboard file.

In [None]:
dashboard_html = """
<!DOCTYPE html>
<html>
<head>
    <title>Lubbock Property Dashboard</title>
    <style>
        .tab { overflow: hidden; border: 1px solid #ccc; background-color: #f1f1f1; }
        .tab button { float: left; border: none; padding: 14px 16px; cursor: pointer; }
        .tabcontent { display: none; padding: 10px; border: 1px solid #ccc; }
    </style>
</head>
<body>
    <h1>Lubbock Property Dashboard</h1>
    <div class="tab">
"""
for year in years:
    dashboard_html += f'<button onclick="openTab(event, \'{year}\')">{year}</button>'
dashboard_html += "</div>"

for year in years:
    dashboard_html += f'<div id="{year}" class="tabcontent"><h2>{year}</h2>'
    for stat in stats:
        dashboard_html += f'<h3>{stat.capitalize()}</h3>'
        dashboard_html += f'<p><a href="{stat}/{year}.html">Map</a> | '
        dashboard_html += f'<a href="{stat}/{year}_bar.html">Bar Plot</a> | '
        if stat == 'mean':
            dashboard_html += f'<a href="{stat}/{year}_violin.html">Violin Plot</a></p>'
        else:
            dashboard_html += '</p>'
    dashboard_html += f'<p><a href="extra/heatmap_{year}.html">Heatmap</a></p></div>'

dashboard_html += """
<script>
    function openTab(evt, year) {
        var i, tabcontent = document.getElementsByClassName("tabcontent");
        for (i = 0; i < tabcontent.length; i++) tabcontent[i].style.display = "none";
        var tablinks = document.getElementsByTagName("button");
        for (i = 0; i < tablinks.length; i++) tablinks[i].className = "";
        document.getElementById(year).style.display = "block";
        evt.currentTarget.className = "active";
    }
    document.getElementsByTagName("button")[0].click();
</script>
</body></html>
"""

with open(f"{vis_dir}/dashboard.html", 'w') as f:
    f.write(dashboard_html)
print("Dashboard created at visualizations/dashboard.html")


Dashboard created at visualizations/dashboard.html


## Folder Structure
```
/content/drive/MyDrive/visualizations/
├── mean/
│   ├── 1945.html (Map)
│   ├── 1945_bar.html (Bar Plot)
│   ├── 1945_violin.html (Violin Plot)
│   └── ... (for other years)
├── median/
│   ├── 1945.html
│   ├── 1945_bar.html
│   └── ...
├── mode/
├── min/
├── max/
├── extra/
│   ├── heatmap_1945.html
│   └── ...
└── dashboard.html
```