## Data preparation

In [1]:
import pandas as pd
import altair as alt

In [2]:
df_covid = pd.read_csv('../data/covid.csv', low_memory=False)
df_covid['day'] = pd.to_datetime(df_covid['day'])

In [3]:
df_covid.tail()

Unnamed: 0,day,cases,deaths,new_cases,new_deaths,week_avg_new_cases,week_avg_new_deaths
240,2020-10-27,264428,23978,734.0,9.0,644.0,4.857143
241,2020-10-28,265263,23980,835.0,2.0,643.0,4.428571
242,2020-10-29,266321,23989,1058.0,9.0,718.285714,4.857143
243,2020-10-30,267227,23996,906.0,7.0,766.285714,4.714286
244,2020-10-31,267929,24001,702.0,5.0,759.142857,5.428571


This cell parses the day into a datetime and adds columns for the year and the datetime with the year set to 1900. The purpose of this is to make visualizing the two years on the same line chart easier.

In [4]:
df_311_day = pd.read_csv('../data/complaints_by_day.csv', low_memory=False)

df_311_day['day'] = pd.to_datetime(df_311_day['day'])
df_311_day['year'] = df_311_day['day'].dt.year
df_311_day['day_no_year'] = df_311_day['day'].apply(lambda x: x.replace(year=1900))

This cell calculates the 7-day rolling averages for the number of total complaints and number of noise complaints.

In [5]:
def rolling_avg(year, col):
    new_col = 'week_avg_' + col
    df_311_day.loc[df_311_day['year'] == year, new_col] = df_311_day[df_311_day['year'] == year][col].rolling(7).mean()

rolling_avg(2019, 'num_noise_complaints')
rolling_avg(2020, 'num_noise_complaints')

rolling_avg(2019, 'num_complaints')
rolling_avg(2020, 'num_complaints')

In [6]:
df_311_day.tail()

Unnamed: 0,day,num_complaints,num_noise_complaints,year,day_no_year,week_avg_num_noise_complaints,week_avg_num_complaints
485,2020-10-27,7198,1601,2020,1900-10-27,2403.285714,7381.571429
486,2020-10-28,6712,1459,2020,1900-10-28,2327.571429,7315.571429
487,2020-10-29,6985,1060,2020,1900-10-29,2176.142857,7261.714286
488,2020-10-30,7884,1393,2020,1900-10-30,1951.285714,7255.428571
489,2020-10-31,7997,3027,2020,1900-10-31,1805.571429,7316.571429


## Complaint Charts

### Noise complaints

In [7]:
alt.Chart(df_311_day).mark_line().encode(
    x=alt.X('day_no_year', title='Date'),
    y=alt.Y('week_avg_num_noise_complaints', title='Noise complaints 7-day average'),
    color='year:N',
)

### All complaints

The sharp drop at July 2019 is curious.

In [8]:
alt.Chart(df_311_day).mark_line().encode(
    x=alt.X('day_no_year', title='Date'),
    y=alt.Y('week_avg_num_complaints', title='311 complaints 7-day average'),
    color='year:N',
)

## COVID Charts

In [9]:
alt.Chart(df_covid).mark_line().encode(
    x=alt.X('day', title='Date'),
    y=alt.Y('week_avg_new_cases', title='New Cases 7-day average')
)