In [2]:
import pandas as pd
import altair as alt

Github Link: https://github.com/Noah-Mack-01/DS4200_A5

My dataset is pulled from the Census Bureau, and documents the percentage of rental vacancies by state and quarter. (Available here: https://www.census.gov/housing/hvs/data/rates.html).
I was interested in seeing the overall trends in rental demand over the latter half of the 2010s, especially when interest rates were as low as they were, and then the subsequent shock from the COVID-19 pandemic. 

Surprisingly enough, there wasn't as drastic an increase in vacancies as expected. While the period aligned with the Pandemic certainly saw local, or even global maxima, there were several instances of severe fluctuation on a quarterly basis. Massachusetts, for example, did spike from 4.3% to 6.6%, an approximate 50% increase in vacancies, at the start of the pandemic, but switftly saw that figure drop to 3.1% by Q2. 

However, this might be explained by the inclusion of the District of Columbia within the dataset, which saw a dramatic rise in vacancies that did not peak until Q2 of 2021 and has yet to reach pre-pandemic lows. Given the urban nature of Washington D.C., it might be speculated that the true shocks of rentals are and were limited to urban areas, whereas most states are filled with less populous but nevertheless developed suburban regions with ample housing, or differences in tenant law or lease standards which affected their market's liquidity. 

In [107]:
df = pd.read_csv("tab1_state05_2022_rvr.csv")
df = df.dropna(axis=0, how='all')
df = df.dropna(axis=1, how='all')
df
## Dataframe with data, cleaning out blank rows.

Unnamed: 0,Quarter,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,4/1/2022,6.9,5.1,6.0,7.5,3.8,3.4,4.7,3.3,8.3,...,5.4,6.0,7.7,5.0,2.5,5.3,4.9,8.0,4.9,9.2
1,7/1/2022,6.9,4.1,5.5,8.4,4.1,4.0,2.1,2.6,8.5,...,4.7,8.9,7.4,4.7,2.4,4.5,4.0,7.3,3.2,6.0
2,10/1/2022,9.3,6.2,6.7,12.9,4.1,5.0,2.0,3.7,7.7,...,5.8,7.8,8.1,5.0,4.2,3.3,5.5,6.4,4.5,7.3
3,1/1/2023,9.3,5.9,6.7,13.3,3.6,3.9,4.4,4.7,7.2,...,7.9,6.5,8.3,3.0,5.1,6.3,4.3,10.6,5.8,6.3
4,4/1/2021,9.5,5.7,5.2,7.0,4.8,5.0,6.1,3.0,10.5,...,8.9,6.4,9.5,4.2,1.8,4.9,4.1,5.5,4.5,9.9
5,7/1/2021,12.8,3.9,4.7,6.8,4.8,4.1,5.4,3.8,11.0,...,8.7,7.6,7.9,4.6,1.8,4.4,5.3,6.0,3.5,7.5
6,10/1/2021,12.5,3.8,4.7,8.9,4.0,3.9,4.1,3.0,9.9,...,10.0,8.4,8.2,5.4,3.5,6.6,5.1,6.6,5.1,7.5
7,1/1/2022,12.0,3.5,4.6,7.5,3.5,5.2,4.7,5.5,7.6,...,7.2,6.6,8.0,3.2,2.7,5.7,3.6,8.1,5.4,7.1
8,4/1/2021,16.0,8.2,3.4,9.1,3.4,4.5,5.7,9.4,7.4,...,8.8,7.4,8.3,6.3,3.6,8.8,4.8,7.1,3.0,9.1
9,7/1/2021,17.4,7.5,6.3,8.0,3.2,2.6,6.0,5.5,6.8,...,8.1,9.1,8.3,4.8,3.9,4.4,2.9,6.4,6.3,9.8


In [142]:

line = alt.Chart(df, title="Rental Vacancy in State of Massachusetts and Rolling Annual Average",).mark_line(
    color='blue',
    size=2,
).transform_window(
    rolling_mean='mean(Massachusetts)',
    frame=[-3,1]
).encode(
    x='Quarter:T',
    y='rolling_mean:Q',
)

lineMary = alt.Chart(df).mark_line(
    color='red',
    size=2,
).transform_window(
    rolling_mean='mean(Maryland)',
    frame=[-3,1]
).encode(
    x='Quarter:T',
    y='rolling_mean:Q',
)


pointsMass = alt.Chart(df).mark_point(color='cyan').encode(
    x=alt.X('Quarter:T', axis=alt.Axis(title='Date')),
    y=alt.Y('Massachusetts:Q',
            axis=alt.Axis(title='Vacancy %'))
)

pointsMary = alt.Chart(df).mark_point( color='pink').encode(
    x=alt.X('Quarter:T', axis=alt.Axis(title='Date')),
    y=alt.Y('Maryland:Q',
            axis=alt.Axis(title='Vacancy %'))
)

combined = pointsMass + pointsMary
combined + line + lineMary

## I beleive that the use of a sliding window and rolling mean as detailed in the provided examples is enough distinction from a typical line or scatter chart to qualify. 
## The sliding window includes 3 preceding periods to signify a full year of four quarters. 
## I tried for about 2 hours to get this to have a drop-down that changed state selected, but found out that Vega-lite doesn't even allow that.
## So, I couldn't even make it interactive as I'd intended, and ran out of time.

## I tried my best to clean my data-set into a "Tall" Chart, but the end result made comparing any states trends unwieldy and didn't lend itself to interactivity. 
## I'm expecting partial credit at best on this, this is not nearly my best work and I apologize. 

In [177]:
### Layered Histogram of Distributions
dd = alt.binding_select(options=['Massachusetts','Michigan','Maryland'], name='State')
selection = alt.selection_single(fields=['State'], bind=dd)
opacity=alt.condition(selection,
                    alt.Color('State:N',legend=None),
                    alt.value(.05)
                   )
    
alt.Chart(df).transform_fold(
    ['Massachusetts','Michigan','Maryland'],
    as_=['State', 'Percentage']
).mark_bar(
    opacity=.7,
    binSpacing=1,
).encode(
    alt.X('Percentage:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', stack=None),
    color='State:N',
    opacity=opacity
).add_selection(
    selection
)