In [None]:

import pandas as pd
import bqplot
import numpy as np
import ipywidgets

In [None]:
buildings = pd.read_csv('https://uiuc-ischool-dataviz.github.io/is445_AOUAOG_fall2021/week02/data/building_inventory.csv',
               na_values={'Square Footage': 0,
                         'Year Acquired': 0,
                         'Year Constructed': 0,
                         'Floors':0}
                       )

Here, I imported the complete building dataset and replaced '0' in columns like Square footage, Year Acquired, Year Constructed, Floors with NaN. I have not made any transformations for the NaN values in the County column.

In [None]:
buildings

##### Plotting barplot

The right component of the dashboard is a barplot as we need to plot- Year Vs Total Square footage.

In [None]:

#1. Data. Grouping the data according to the year and calculating the total square footage in that year.
buildings_barplot = buildings.groupby(['Year Acquired'])['Square Footage'].sum()
years = buildings_barplot.index
area = buildings_barplot.values

# 2. Scales
x_scb = bqplot.OrdinalScale() # as these are years so we can categorise the data hence an ordinal scale is used
y_scb = bqplot.LinearScale() #as these are sum of square footages, I used a linear scale

# 3. Marks
bar = bqplot.Bars(x = years ,y = np.log10(area), scales={'x': x_scb, 'y': y_scb})
#transformed the area by taking log10, as the range for the total area over years was large

# 4. Axis
x_axb = bqplot.Axis(scale = x_scb, label='Year Acquired', tick_rotate=90, tick_style={'font-size':'3px'})
#Rotated and changed the size of the ticks as for some plots they might overlap. Would like to
#experiment more with font size if it could change depending on the number of values to be displayed on x axis.
y_axb = bqplot.Axis(scale = y_scb, orientation = 'vertical', label = 'Square footage (log)',label_offset='40px')

# 5. Interactions
# No interactions for barplot

# 6. Figure object
fig_barplot = bqplot.Figure(marks=[bar], axes=[x_axb, y_axb])

Data transformations for plotting heatmap:

Here, I created a pivot table which has the county as the index and the columns names are the agencies. The values in the table are mean of square footage area. Creating this table is necessary as it provides value for each block of the heatmap.


In [None]:
buildings_heatmap =pd.pivot_table(buildings, index = 'County', columns = 'Agency Name', values = 'Square Footage', aggfunc = 'mean')
agency_list = buildings_heatmap.columns.values.tolist()
buildings_heatmap= np.log10(buildings_heatmap) ## transformed all the values in the pivot table by taking log10


In [None]:
buildings_heatmap

##### Plotting Heatmap

In [None]:
# heatmap
# 1. data = buildings_heatmap table created above
# 2. Scales
col_sc = bqplot.ColorScale(scheme = 'Spectral')
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# 3. Marks. Here I passed agency list as the columns and county names as the rows.
heat_map = bqplot.GridHeatMap(row = buildings_heatmap.index,
                              column = agency_list,
                              color=buildings_heatmap, scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                             interactions={'click':'select'},
                             anchor_style={'fill':'blue'})

# 4. Axis
c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical', side='right')


x_ax = bqplot.Axis(scale = x_sc, label='County', label_offset='55px',
                  tick_rotate=90,
                   tick_style={'font-size':'8px'})
#Rotated and resized the ticks as they were overlapping. Used label offset so that the label is clearly visible and away from the ticks.
y_ax = bqplot.Axis(scale = y_sc,
                   orientation = 'vertical',
                   label = 'Agency Name',
                  tick_style={'font-size':'3.5px'})

# 5. Interactions
def on_selected(change):
    if len(change['owner'].selected) == 1:
        i,j = change['owner'].selected[0] ## saving the coordinates selected by the user
        mask = (buildings['Agency Name'] == buildings_heatmap.columns[j] ) & \
            (buildings['County'] == buildings_heatmap.index[i])
        # generating a boolean mask by selecting the data that matches the county and the agency as selected by the user
        buildings_subset = buildings[mask]
        # creating a subset of the data selected by the user from the main buildings data
        grouped = buildings_subset.groupby("Year Acquired")["Square Footage"].sum()
        years = grouped.index
        area = grouped.values
        #making these changes in the barplot when a cell is selected
        bar.x = years
        bar.y = area

heat_map.observe(on_selected, 'selected') #interaction with the heatmap when a cell is selected

# 6. Figure object
fig_heatmap = bqplot.Figure(marks=[heat_map], axes=[c_ax, x_ax, y_ax])


##### Creating the dashboard

Displaying the two plots created above.

In [None]:
fig_heatmap.layout.min_width='500px'
fig_barplot.layout.min_width = '500px'
figures = ipywidgets.HBox([fig_heatmap, fig_barplot])

myDashboard = ipywidgets.VBox([ figures])
myDashboard
