In [14]:
import pandas as pd
import pandas as pd
import holoviews as hv
import panel as pn
from bokeh.models import NumeralTickFormatter  # Formatter for y-axis

# Enable Holoviews Bokeh extension
hv.extension('bokeh')

# Load the dataset
file_path = "./Resources/full_data_set_zipcodes.csv"
df = pd.read_csv(file_path)

# Display basic information and first few rows
df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113 entries, 0 to 112
Columns: 733 entries, DATE to observation_date
dtypes: float64(731), object(2)
memory usage: 647.2+ KB


(None,
          DATE  PRICE_PER_GALLON  UNEMPLOYMENT_RATE  AVG_MORTGAGE_RATE  \
 0  2010-11-01             2.913                9.8                4.4   
 1  2010-12-01             3.048                9.3                4.5   
 2  2011-01-01             3.148                9.1                4.8   
 3  2011-02-01             3.264                9.0                4.9   
 4  2011-03-01             3.615                9.0                5.0   
 
    INTEREST_RATES  INFLATION     60601     89101     75201    43085  ...  \
 0            0.19    218.803  190000.0  117400.0  169400.0  84700.0  ...   
 1            0.18    219.179  190400.0  116600.0  169500.0  90600.0  ...   
 2            0.17    220.223  188500.0  115600.0  191900.0  89100.0  ...   
 3            0.16    221.309  189400.0  114000.0  184500.0  86000.0  ...   
 4            0.14    223.467  186300.0  110800.0  174500.0  79800.0  ...   
 
       32003     15108     19426     33957     19468     32550     85641  \
 0  219

In [15]:
# Convert DATE column to datetime format
df["DATE"] = pd.to_datetime(df["DATE"])

# Load ZIP to City/State mapping dataset
mapping_file_path = "./Resources/Sale_Prices_City_sample.csv"
mapping_df = pd.read_csv(mapping_file_path)

# Extract valid ZIP codes (RegionIDs) that exist in df.columns
unique_region_ids = mapping_df["RegionID"].astype(str).unique()
valid_zip_codes = [zip_code for zip_code in unique_region_ids if zip_code in df.columns]

# Filter mapping_df to only include valid ZIP codes
filtered_mapping_df = mapping_df[mapping_df["RegionID"].astype(str).isin(valid_zip_codes)]

# Create a mapping of ZIP code (RegionID) to "City, State"
corrected_zip_to_city_state = {
    str(row["RegionID"]): f"{row['RegionName']}, {row['StateName']}"
    for _, row in filtered_mapping_df.iterrows()
}

# Reverse mapping to get ZIP code from city-state selection
corrected_city_state_to_zip = {v: k for k, v in corrected_zip_to_city_state.items()}

# Get min and max years for range selection
min_year, max_year = df["DATE"].dt.year.min(), df["DATE"].dt.year.max()


In [16]:
# Create a dropdown with corrected city, state names
corrected_city_state_dropdown = pn.widgets.Select(name="Select Location", options=list(corrected_city_state_to_zip.keys()))

# Create a range slider for year selection
year_range_slider = pn.widgets.IntRangeSlider(
    name="Select Year Range",
    start=min_year,
    end=max_year,
    value=(min_year, max_year),  # Default to full range
    step=1
)



In [17]:
# Function to update the plot based on city/state selection
def plot_corrected_city_state_time_series(city_state, year_range):
    zip_code = corrected_city_state_to_zip.get(city_state, None)  # Get corresponding ZIP code

    if zip_code not in df.columns:
        return hv.Curve([]).opts(title="Invalid ZIP Code", xlabel="Year", ylabel="Housing Price ($)")

    # Filter data based on the selected year range
    start_year, end_year = year_range
    filtered_df = df[(df["DATE"].dt.year >= start_year) & (df["DATE"].dt.year <= end_year)]

    curve = hv.Curve((filtered_df["DATE"], filtered_df[zip_code]), 'DATE', 'Price').opts(
        title=f"Housing Prices Over Time for {city_state} (ZIP: {zip_code})",
        xlabel="Year",
        ylabel="Housing Price ($)",
        width=800,
        height=500,
        line_width=2,
        tools=['hover'],
        yformatter=NumeralTickFormatter(format="0,0")  # Format y-axis with real values
    )
    return curve

In [18]:
# Bind the interactive plot to the corrected city/state dropdown
corrected_interactive_plot = pn.bind(plot_corrected_city_state_time_series, city_state=corrected_city_state_dropdown, year_range=year_range_slider)
corrected_dashboard = pn.Column(corrected_city_state_dropdown, year_range_slider, corrected_interactive_plot)

# Display the corrected interactive dashboard in Jupyter Notebook
corrected_dashboard.show()

Launching server at http://localhost:59999


<panel.io.server.Server at 0x12297fce0>

In [19]:
# Load the ZIP code coordinates file
zip_coords_file_path = "./Resources/zipcodes_coordinates.csv"
zip_coords_df = pd.read_csv(zip_coords_file_path)

# Display basic information and first few rows
zip_coords_df.info(), zip_coords_df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 726 entries, 0 to 725
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   RegionID   726 non-null    int64  
 1   City       726 non-null    object 
 2   State      726 non-null    object 
 3   Zip Code   726 non-null    int64  
 4   Latitude   726 non-null    float64
 5   Longitude  726 non-null    float64
dtypes: float64(2), int64(2), object(2)
memory usage: 34.2+ KB


(None,
    RegionID        City     State  Zip Code   Latitude   Longitude
 0     17426     Chicago  Illinois     60601  41.843935  -87.786352
 1     18959   Las Vegas    Nevada     89101  36.167145 -115.139644
 2     38128      Dallas     Texas     75201  32.785467  -96.798295
 3     10920    Columbus      Ohio     43085  40.095385  -83.037600
 4     12455  Louisville  Kentucky     40202  38.251228  -85.749400)

In [2]:
import panel as pn
import pandas as pd
import numpy as np
from jupyter_bokeh.widgets import BokehModel
import hvplot.pandas

In [4]:
df_prices = pd.read_csv(r"./Resources/full_data_set_zipcodes.csv")
df_zipcodes = pd.read_csv(r"./Resources/zipcodes_coordinates.csv")
df_prices["DATE"] = pd.to_datetime(df_prices["DATE"])
# Extract relevant columns (ZIP code columns)
zip_columns = [col for col in df_prices.columns if col.isdigit()]
df_prices[zip_columns] = df_prices[zip_columns].apply(pd.to_numeric, errors='coerce')
# Merge ZIP codes with city names
df_zipcodes["Zip Code"] = df_zipcodes["Zip Code"].astype(str)  # Convert to string for merging
zip_city_mapping = df_zipcodes.set_index("Zip Code")["City"].to_dict()

# Create a new DataFrame with city-wise housing prices
df_city = df_prices.melt(id_vars=["DATE"], value_vars=zip_columns, var_name="Zip Code", value_name="Price")
df_city["City"] = df_city["Zip Code"].map(zip_city_mapping)
df_city.dropna(subset=["City"], inplace=True)  # Remove unknown ZIPs

# Aggregate prices by city
df_city_grouped = df_city.groupby(["DATE", "City"])["Price"].mean().reset_index()


In [7]:
# Multi-select widget for city selection
cities = df_city_grouped["City"].unique().tolist()
multi_select = pn.widgets.MultiSelect(name="Select Cities", options=cities, value=["Chicago", "Dallas"])

def plot_cities(selected_cities):
    return df_city_grouped[df_city_grouped["City"].isin(selected_cities)].hvplot.line(
        x="DATE", y="Price", by="City",
        title="Comparative Time Series for Multiple Cities",
        xlabel="Year", ylabel="Housing Prices",
        width=800, height=500, legend="top_left",
    )
interactive_plot = pn.bind(plot_cities, multi_select)
dashboard = pn.Column(
    "## 📊 Comparative Time Series for Multiple Cities",
    "### Select Cities to Compare Housing Price Trends",
    multi_select,
    interactive_plot
)

# Show dashboard in Jupyter Notebook
dashboard.show()

Launching server at http://localhost:64475


<panel.io.server.Server at 0x115e2bec0>

In [31]:
import pandas as pd

# File paths
housing_prices_path = "./Resources/full_data_set_zipcodes.csv"
zip_metadata_path = "./Resources/zipcodes_coordinates.csv"

# Load datasets
housing_prices_df = pd.read_csv(housing_prices_path)
zip_metadata_df = pd.read_csv(zip_metadata_path)

# Display the first few rows of each dataset
housing_prices_df.head(), zip_metadata_df.head()



(         DATE  PRICE_PER_GALLON  UNEMPLOYMENT_RATE  AVG_MORTGAGE_RATE  \
 0  2010-11-01             2.913                9.8                4.4   
 1  2010-12-01             3.048                9.3                4.5   
 2  2011-01-01             3.148                9.1                4.8   
 3  2011-02-01             3.264                9.0                4.9   
 4  2011-03-01             3.615                9.0                5.0   
 
    INTEREST_RATES  INFLATION     60601     89101     75201    43085  ...  \
 0            0.19    218.803  190000.0  117400.0  169400.0  84700.0  ...   
 1            0.18    219.179  190400.0  116600.0  169500.0  90600.0  ...   
 2            0.17    220.223  188500.0  115600.0  191900.0  89100.0  ...   
 3            0.16    221.309  189400.0  114000.0  184500.0  86000.0  ...   
 4            0.14    223.467  186300.0  110800.0  174500.0  79800.0  ...   
 
       32003     15108     19426     33957     19468     32550     85641  \
 0  219200.0  

In [41]:
import pandas as pd

# Load datasets
housing_prices_df = pd.read_csv("./Resources/full_data_set_zipcodes.csv")  # Replace with your actual file path
zip_metadata_df = pd.read_csv("./Resources/zipcodes_coordinates.csv")  # Replace with your actual file path



In [39]:
import pandas as pd

# Reshape housing prices dataset from wide to long format
housing_prices_long = housing_prices_df.melt(id_vars=["DATE"], var_name="Zip Code", value_name="Price")

# Convert ZIP code column to numeric
housing_prices_long["Zip Code"] = pd.to_numeric(housing_prices_long["Zip Code"], errors='coerce')
housing_prices_long = housing_prices_long.dropna(subset=["Zip Code"]).astype({"Zip Code": "int"})

# Convert DATE to datetime format
housing_prices_long["DATE"] = pd.to_datetime(housing_prices_long["DATE"])

# Extract Year
housing_prices_long["Year"] = housing_prices_long["DATE"].dt.year

# Aggregate: Average Price per ZIP Code for Each Year
avg_prices = housing_prices_long.groupby(["Year", "Zip Code"])["Price"].mean().reset_index()

# Merge with ZIP metadata
merged_df = avg_prices.merge(zip_metadata_df, on="Zip Code", how="left")

# Drop rows with missing coordinates
merged_df = merged_df.dropna(subset=["Latitude", "Longitude"])


   Year  Zip Code     Price  RegionID          City     State   Latitude  \
0  2010     10701  486550.0     34937       Yonkers  New York  40.943386   
1  2010     10801  592900.0     26114  New Rochelle  New York  40.916581   
2  2010     11003  292000.0     31419        Elmont  New York  40.699658   
3  2010     11510  349300.0     30331       Baldwin  New York  40.654186   
4  2010     11530  701700.0     31695   Garden City  New York  40.725825   

   Longitude  
0 -73.886004  
1 -73.786493  
2 -73.704881  
3 -73.609572  
4 -73.646700  


In [47]:
import param
from holoviews import streams
from param import Parameterized, Number


In [48]:
import pandas as pd
import holoviews as hv
import geoviews as gv
import datashader as ds
import panel as pn
from holoviews.operation.datashader import datashade
from holoviews import opts

# Enable Holoviews & GeoViews
hv.extension("bokeh")
gv.extension("bokeh")

In [55]:
import holoviews as hv
import geoviews as gv
import datashader as ds
import panel as pn
from holoviews.operation.datashader import datashade
from holoviews import opts

# Enable Holoviews & GeoViews
hv.extension("bokeh")
gv.extension("bokeh")

# Ensure column names are clean
merged_df.columns = merged_df.columns.str.strip()

# Create a Panel widget for selecting the year
year_selector = pn.widgets.IntSlider(
    name="Select Year",
    start=int(merged_df["Year"].min()),
    end=int(merged_df["Year"].max()),
    step=1,
    value=int(merged_df["Year"].min())
)

# Function to generate the heatmap for the selected year
def plot_heatmap(selected_year):
    df_filtered = merged_df[merged_df["Year"] == selected_year]

    # Create a heatmap using ZIP code locations
    points = gv.Points(df_filtered, ["Longitude", "Latitude"], ["Price", "City", "State"])

    # Use datashader for smooth visualization
    heatmap = datashade(points, aggregator=ds.mean("Price"), cmap="inferno")

    # Base map (tiles)
    tiles = gv.tile_sources.OSM

    return tiles * heatmap

# Bind function to slider
heatmap_dmap = hv.DynamicMap(plot_heatmap, streams=[pn.bind(plot_heatmap, year_selector)])


# Create dashboard layout
dashboard = pn.Column(
    "# 🏠 US Housing Prices Heatmap",
    "### Select a Year to View Average Housing Prices by ZIP Code",
    year_selector,
    heatmap_dmap
)

# Run this in a Jupyter Notebook to display
dashboard.servable()


KeyError: "Callable 'plot_heatmap' missing keywords to accept stream parameters: __arg0"