<a href="https://colab.research.google.com/github/ReidelVichot/DSTEP23/blob/main/week_12/dstep23_heatmaps_infographics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **DSTEP23 // Heatmaps and Infographics: Visualizing and Communicating DSNY Collection Data**

*November 14, 2023*

This notebook will create a heatmap calendar visualization of the collected refuse tonnage from DSNY as well as create a static infographic combining both temporal and spatial information in a visualization of the collected refuse tonnage from DSNY.

---

In [None]:
# -- import pandas, numpy, and matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import calendar

### Creating a Calendar-view Heatmap

In [None]:
# -- set the filename and read the data while parsing the MONTH column
fname = "https://data.cityofnewyork.us/api/views/ebb7-mvp5/rows.csv?accessType=DOWNLOAD"
dsny  = pd.read_csv(fname, parse_dates=["MONTH"])

# -- sub-select only the first six columns
cols = dsny.columns[:6]
dsny = dsny[cols]

# -- rename the columns for ease of use
dsny.columns = ["month", "borough", "district", "refuse", "paper", "mgp"]

# -- sub-select Brooklyn
bk = dsny[dsny["borough"] == "Brooklyn"].copy()

# -- sum across districts
bk_ts = bk.groupby("month").sum(numeric_only=True).reset_index()

# -- convert to tons per day
cols = ["refuse", "paper", "mgp"]
bk_ts[cols] = bk_ts[cols].divide(bk_ts["month"].dt.daysinmonth, axis=0)

In [None]:
# -- add month and year columns
bk["jan2dec"] =
bk["year"] =

In [None]:
# -- group by year and month
bk_gr = bk.groupby(["jan2dec", "year"]).sum(numeric_only=True)

# -- unstack and select refuse to create a "grid"
bk_ustack = bk_gr.unstack("jan2dec")
bk_tgrid = bk_ustack["refuse"]

In [None]:
# -- creat a heatmap of the year and month
fig, ax = plt.subplots(figsize=(10, 5))


fig.show()

### Generating a Static Infographic from Spatiotemporal Data

In [None]:
# -- read in the DSNY district shapes
sname = "https://data.cityofnewyork.us/api/geospatial/i6mn-amj2?method=export&format=GeoJSON"
dstshp = gp.read_file(sname)

In [None]:
# -- read the DSNY tonnage data
fname = "https://data.cityofnewyork.us/api/views/ebb7-mvp5/rows.csv?accessType=DOWNLOAD"
dsny  = pd.read_csv(fname, parse_dates=["MONTH"])

# -- sub-select only the first six columns
cols = dsny.columns[[0, 1, 2, 3, 4, 5, 10]]
dsny = dsny[cols]

# -- rename the columns for ease of use
dsny.columns = ["month", "borough", "district", "refuse", "paper", "mgp", "borough_id"]

# -- drop rows where borough_id is NaN
dsny = dsny.dropna(subset=["borough_id"])

In [None]:
# -- create a district code for merging
dsny["dcode"] =

In [None]:
# -- get totals for each district
nyc_dtot =

In [None]:
# -- merge with district shapes
dst_tot =

In [None]:
# -- change units of collection data
dst_tot["refuse1000"] =

# -- create a time series of total collection
dsny_ts =

# -- sub-select the time range
dsny_ts = dsny_ts[(dsny_ts.index > "1993-01-01") & (dsny_ts.index < "2020-01-01")]

In [None]:
# -- now visualize
