In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/building_inventory.csv"
data = pd.read_csv(url)

In [None]:
data.columns

In [None]:
# Filter out missing data
data = data.dropna(subset=['Usage Description', 'City'])

In [None]:
city_dropdown = alt.binding_select(options=sorted(data['City'].unique()), name="City: ")
city_selection = alt.selection_point(fields=['City'], bind=city_dropdown, name="Select City")

In [None]:
usage_chart = alt.Chart(data).mark_bar().encode(
    x=alt.X('Usage Description:N', title='Building Usage', sort='-y'),
    y=alt.Y('count():Q', title='Number of Buildings'),
    color=alt.Color('Usage Description:N', legend=None),
    tooltip=[
        alt.Tooltip('Usage Description:N', title='Building Usage'),
        alt.Tooltip('count():Q', title='Number of Buildings')
    ]
).add_params(
    city_selection  
).transform_filter(
    city_selection  
).properties(
    title='Distribution of Building Usages by City',
    width=800,
    height=500
)

usage_chart

## Second visualization

In [None]:
data['Year Constructed'] = pd.to_numeric(data['Year Constructed'], errors='coerce')
data = data.dropna(subset=['Year Constructed', 'City'])

In [None]:
year_slider = alt.param(
    name="YearFilter",
    bind=alt.binding_range(
        min=int(data['Year Constructed'].min()), 
        max=int(data['Year Constructed'].max()), 
        step=1,
        name="Year Constructed: "
    ),
    value=int(data['Year Constructed'].median())  # Default slider value
)



In [None]:
interactive_bar = alt.Chart(data).mark_bar().encode(
    x=alt.X('City:N', title='City', sort='-y'),
    y=alt.Y('BuildingCount:Q', title='Number of Buildings'),  # Use aggregated count explicitly
    color=alt.Color('City:N', legend=None),
    tooltip=[
        alt.Tooltip('City:N', title='City'),
        alt.Tooltip('BuildingCount:Q', title='Number of Buildings')
    ]
).transform_filter(
    f"datum['Year Constructed'] <= {year_slider.name}"  # Filter data dynamically by the year slider
).transform_aggregate(
    BuildingCount='count()',  # Aggregate data to compute the number of buildings
    groupby=['City']
).transform_window(
    rank='rank(BuildingCount)',  # Rank cities based on building count
    sort=[alt.SortField('BuildingCount', order='descending')]
).transform_filter(
    'datum.rank <= 30'  # Filter to include only the top 30 cities
).add_params(
    year_slider
).properties(
    title='Number of Buildings by Top 30 Cities (Filtered by Year Constructed)',
    width=800,
    height=500
)

In [None]:
interactive_bar