In [1]:
import doutils
doutils.nb.megaimport(globals())
%load_ext autoreload
%autoreload 2

from shapely.geometry import box, Polygon
import h3
from cartoframes.viz import Map, Layer, size_bins_style, color_bins_style, color_continuous_style, basic_style

from sf_utils import *

In [2]:
import json
from matplotlib import pyplot as plt
import seaborn as sns

In [3]:
with open("hotspots_h3_z7.json", "r") as fp:
    hotspots_h3_z7 = json.load(fp)

In [4]:
bq_client = get_bq_client()

In [5]:
cities = ["buffalo", "raleigh", "los_angeles", "chicago"]

In [36]:
def plot_time_series_2_years(city):
    ts_daily = ts_daily_dict[city]
    ts_daily = ts_daily.rename(columns = rename_columns)
    ts_daily_bf_2019 = ts_daily.loc[bf_2019 - 7 : bf_2019 + 7]
    ts_daily_bf_2020 = ts_daily.loc[bf_2020 - 7 : bf_2020 + 7]
    
    cat_palette = {}
    for i, c in zip(ts_daily.columns, colors):
        cat_palette[i] = c
    fig, ax = plt.subplots(nrows=2, figsize=(20,14))
    ax[0].axvline(bf_2019, 0,1, color = "black")
    ax[1].axvline(bf_2020, 0,1, color = "black")
    ax[0].set_title(f"Total daily visits in hotspot locations per category during Black Friday weeks - {title_cities.get(city)}", size = 20)
    ax[0].set(xlabel = "Black Friday 2019", ylabel = "Total daily visits")
    ax[1].set(xlabel = "Black Friday 2020", ylabel = "Total daily visits")
    sns.lineplot(data = ts_daily_bf_2019, palette = cat_palette, dashes = False, ax = ax[0])
    sns.lineplot(data = ts_daily_bf_2020, palette = cat_palette, dashes = False, ax = ax[1], legend = False)
    ax[0].legend(loc='upper right')
    return fig

In [31]:
def plot_time_series(city, year):
    ts_daily = ts_daily_dict[city]
    ts_daily_bf_2019 = ts_daily.loc[bf_2019 - 7 : bf_2019 + 7]
    ts_daily_bf_2020 = ts_daily.loc[bf_2020 - 7 : bf_2020 + 7]
    if year == 2019:
        ts_daily_bf, bf_date = (ts_daily_bf_2019, bf_2019)
    elif year == 2020:
        ts_daily_bf, bf_date = (ts_daily_bf_2020, bf_2020)
    fig, ax = plt.subplots(figsize=(20,8))
    plt.axvline(bf_date, 0,1, color = "black")
    return sns.lineplot(data = ts_daily_bf, palette = cat_palette, dashes = False)

In [55]:
rename_columns = {
    'General Merchandise Stores, including Warehouse Clubs and Supercenters' : "Supercenters",
    'Sporting Goods, Hobby, and Musical Instrument Stores': 'Sport, Hobby and Instrument Stores',
}
colors = "#855C75,#D9AF6B,#AF6458,#736F4C,#526A83,#625377,#68855C,#9C9C5E,#A06177,#8C785D,#467378,#7C7C7C".split(",")
colors = "#5F4690,#1D6996,#38A6A5,#0F8554,#73AF48,#EDAD08,#E17C05,#CC503E,#94346E,#6F4070,#994E95,#666666".split(",")

In [15]:
for city in tqdm(cities):
    q_hotspots = f"""
    SELECT {", ".join(cols)}
    FROM `cartodb-on-gcp-datascience.juanluis.safegraph_blackfriday_{city}`
    WHERE raw_visit_counts is not null and top_category IN {tuple(chosen_categories)}
    AND h3_z7 IN {tuple(hotspots_h3_z7.get(city))}
    """
    df = bq_client.query(q_hotspots).result().to_dataframe()
    #df.to_csv(f"hotspots/hotspots_{city}.csv")
    del df

100%|██████████| 3/3 [12:14<00:00, 244.92s/it]


In [8]:
ts_daily_dict = {}
for city in tqdm(cities):
    df = pd.read_csv(f"hotspots/hotspots_{city}.csv",parse_dates=['date_range_start', 'date_range_end'],index_col=0)
    ts_daily = explode_daily_counts(df)
    ts_daily_dict[city] = ts_daily
    ts_daily.to_csv(f"ts/ts_daily_hotspots_{city}.csv")
    del df

100%|██████████| 4/4 [12:51<00:00, 192.91s/it]


In [None]:
for city in cities:
    fig = plot_time_series_2_years(city)
    fig.savefig(f"visits_in_hotspots_{city}.png", dpi = 300)

In [40]:
ts_daily = ts_daily_dict["chicago"]

In [43]:
cat_palette = {}
for i, c in zip(ts_daily.columns, colors):
    cat_palette[i] = c

In [None]:
for city, ts_daily in ts_daily_dict.items():
    ma7 = ts_daily.rolling(7).mean()
    ma7_filtered = ma7[(ma7.index > "2019-05-10") & (ma7.index < "2020-12-20")] #"2020-05-01"
    fig, ax = plt.subplots(figsize=(20,8))
    plt.axvline(bf_2019, 0,1, color = "black")
    plt.axvline(bf_2020, 0,1, color = "black")
    sns.lineplot(data = ma7_filtered, dashes = False, palette=cat_palette)
    ax.legend(loc='upper right')
    ax.set_title(f"Total visits counts per category 2019 - 2020 - {title_cities.get(city)}")
    #sns.lineplot(data = ma7_filtered/ma7_filtered.quantile(0.9), dashes = False)
    fig.savefig(f"visits_in_hotspots_2019-2020_{city}.png", dpi = 300)