In [None]:
import pandas as pd

questions_per_library = pd.read_csv(
    'drive/MyDrive/data/stackoverflow.zip', parse_dates = True, index_col ='creation_date'
).loc[:'pandas':'bokeh'].resample('1M').sum().cumsum().reindex(
    pd.date_range('2008-08', '2021-10', freq ='M')
).fillna(0)
questions_per_library.tail()

In [None]:
from matplotlib.animation import FuncAnimation

In [None]:
import matplotlib.pyplot as plt
from matplotlib import ticker

def bar_plot(data):
  fig, ax = plt.subplots (figsize = (8, 6))
  sort_order = data.last('1M').squeeze().sort_values().index
  bars = [
      bar.set_lable(label) for label, bar in
      zip(sort_order, ax.barh(sort_order, [0] * data.shape[1]))
  ]

  ax.set_xlabel('total questions',  fontweight = 'bold')
  ax.set_xlim(0, 250_000)
  ax.xaxis.set_major_formatter(ticker.EngFormatter())
  ax.xaxis.set_tick_params(labelsize = 12)
  ax.yaxis.set_tick_params(labelsize = 12)

  for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

  fig.tight_layout()

  return fig, ax

In [None]:
def generate_plot_text(ax):
  annotations = [
      ax.annotate(
          '', xy = (0, bar.get_y() + bar.get_height()/2),
                    ha = 'left', va = 'center'
      ) for bar in ax.patches
  ]

  time_text = ax.text(
      0.9, 0.1, '', transform = ax.transAxes,
      fontsize = 15, ha = 'center', va = 'center'
  )
  return annotations, time_text

In [None]:
def update (frame, *, ax, df, annotations, time_text):
  data = df.loc[frame, :]

  for rect, text in zip(ax.patches, annotations):
    col = rect.get_label()
    if data[col]:
      rect.set_width(data[col])
      text.set_x(data[col])
      text.set_text(f' {data[col]:,.0f}')

  time_text.set_text(frame.strftime('%b\n%Y'))

In [None]:
from functools import partial

def bar_plot_init(questions_per_library):
  fig, ax = bar_plot(questions_per_library)
  annotations, time_text = generate_plot_text(ax)

  bar_plot_update = partial (
      update, ax = ax, df = questions_per_library,
      annotations = annotations, time_text = time_text
  )

  return fig, bar_plot_update

In [None]:
fig, update_func = bar_plot_init (questions_per_library)

ani = FuncAnimation (
    fig, update_func, frames = questions_per_library.index, repeat = False
)
ani.save (
    '../media/stackoverflow_question.mp4',
    writer = 'ffmpeg', fps = 10, bitrate = 100, dpi = 300
)
plt.close()

In [None]:
from IPython import display

display.Video(
    '../media/stackoverflow_question.mp4', width = 600, height = 400,
    embed = True, html_attributes = 'controls muted autoplay'
)

In [None]:
subway = pd.read_csv(
    '../data/NYC_subway_daily.csv', parse_dates = ['Datetime'],
    index_col = ['Borough', 'Datetime']
)

subway_daily = subway.unstack(0)
subway_daily.head()

In [None]:
import numpy as np

count_per_bin, bin_ranges np.histogram(manhattan_entries, bins= 30)

In [None]:
def subway_histogram(data, bins, date_range):
  _, bin_ranges = np.histogram(data, bins = bins)

  weekday_mask = data.index.weekday <5
  configs = [
      {'label': 'Weeken', 'mask': -weekday_mask, 'ymax': 60},
      {'label': 'Weekday', 'mask': weekday_mask, 'ymax': 120}
  ]

  fig, axes = plt.subplots(1, 2, figsize =(8, 4), sharex = True)
  for ax, config in zip(axes, configs):
    _, _, config['hist'] = ax.hist(
        data[config['mask']].loc[date_range], bin_ranges, ec = 'black'
    )
    ax.xaxis.set_major_formatter(ticker.EngFormatter())
    ax.set(
        xlim = (0, None), ylim = (0, config['ymax']),
        xlabel = f'{config["label"]} Entries'
    )
    for spine in ['top', 'right']:
      ax.spine[spine].set_visible(False)

axes[0].set_ylabel('Frequency')
fig.suptitle('Histogram of Daily Subway Entries in Manhattan')
fig.tigh_layout()

return fig, axes, bin_ranges, configs

In [None]:
_ = subway_histogram(manhattan_entries, bins = 30, date_range ='2017')

In [None]:
def add_time_text(ax):
  time_text = ax.text(
      0.15, 0.9, '', transform = ax.transAxes,
      fontsize = 15, ha = 'center', va = 'center'
  )
  return time_text

In [None]:
from pandas.core import frame
def update(frame, *, data, configs, time_text, bin_ranges):
  artists = []

  time = frame.strftime('%b\n%Y')
  if time != time_text.get_text():
    time_text.set_text(time)
    artists.append(time_text)

  for config in configs:
    time_frame_mask = \
      (data.index > frame - pd.Timedelta(days = 365)) & (data.index <= frame)
    counts, _ = np.histogram(
        data[time_frame_mask & config['mask']],
        bin_ranges
    )
    for count, rect in zip(counts, config['hist'].patches):
      if count != rect.get_height():
        rect.set_height(count)
        artists.append(rect)

return artists

In [None]:
def histogram_init(data, bins, initial_date_range):
  fig, axes, bin_ranges, configs = subway_histogram(data, bins, initial_date_range)

  update_fuunc = partial(
      update, data = data, configs = configs
  )