In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as cm
import matplotlib.widgets as mpl_widgets
import plotly.express as px
import plotly.graph_objects as go

import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import display

from time import sleep
from calendar import monthrange
import json
import requests
import datetime

from statsmodels.tsa.holtwinters import ExponentialSmoothing

import warnings
warnings.filterwarnings("ignore")

In [2]:
# store login data in login.py
%run login.py

In [3]:
# login query as multiline formatted string
# this assumes that login and pwd are defined 
# above

loginquery = f"""
mutation {{
  logIn(
      email:\"{login}\",
      password:\"{pwd}\") {{
    jwt {{
      token
      exp
    }}
  }}
}}
"""

In [4]:
url = 'https://api.numina.co/graphql'

mylogin = requests.post(url, json={'query': loginquery})
mylogin

<Response [200]>

In [5]:
token = mylogin.json()['data']['logIn']['jwt']['token']

In [6]:
expdate = mylogin.json()['data']['logIn']['jwt']['exp']
expdate

'2020-03-18T16:51:44.309423'

# Part 1: How Does Dwell Time Change

If we get the data ourselves each time, it takes around 1 minute for 96 queries (1 day in 15 minute intervals), around 20 seconds for 31 queries (1 month in 31 days).  Might want to limit user selections to either 1 day in 15 minutes or 1 hour intervals, or 1 month in days to avoid loading data for too long.  

TODO: Make a loading bar for data loading.

If by 15 minutes, keep only evens.  If by hour, investigate highest number and decide if evens or thirds.  If by day for a month, by thirds.

### Note: 
- do not show counts of people for privacy purposes.
- every time a selection is made, we requery numina for the data.  This means that loading is slower (especially if you choose day by 15 minutes or month by 6 hours), but we are not storing any data for privacy purposes.
- transitions for playing can be slow when showing days in a month, due to high volumne of data involved in making the heatmap
- the movements of the points are not indicative of any actual movements of people, they're just there to make the transitions look cool
- We only have information for Streetscape from Feb 20 2019 - Jan 12 2020.  We only have information for the Outside from Mar 20 2019 - Jan 12 2020.  We only have information for Under Rainhood from Mar 20 2019 - Dec 6 2020.  Choosing any date outside this range will result in nothing being shown on the maps.
- Plotly's FigureWidget does not currently support animations, so I am using basic widget Output to capture the animated heatmap plot.  This means that if we switch tabs to the other plot, and then reselect data to be redrawn, the animated plot may end up horribly smushed when you switch tabs back to it.  Please always have the Heatmap By Time tab selected when you reselect data to avoid this problem.

In [8]:
# useful global stuff
locations = {"Streetscape": ["SWLSANDBOX1", "streetscape.png"], 
             "Under Raincoat": ["SWLSANDBOX2", "under_raincoat.png"], 
             "Outside": ["SWLSANDBOX3", "outside.png"]}

In [80]:
# widgets

error_messages = widgets.Output()

# day selections
day_picker = widgets.DatePicker(
    description='Date:',
    disabled=False
)

day_timerange_dd = widgets.Dropdown(options = ["15 Minutes", "1 Hour", "6 Hours"], 
                                    description="By:")

area_dd = widgets.Dropdown(options=list(locations.keys()), description="Area:")

day_select = widgets.Button(description='Select Day', disabled=False, button_style='')

day_selections = widgets.VBox([day_picker, day_timerange_dd, area_dd, day_select, error_messages])

# month selections
months_choices = {}
for i in range(1,13):
    months_choices[datetime.date(2019, i, 1).strftime('%B')] = i

month_dd = widgets.Dropdown(options = list(months_choices.keys()), description="Month:")
year_dd = widgets.Dropdown(options = [2019, 2020], description="Year:")
month_timerange_dd = widgets.Dropdown(options = ["6 Hours", "1 Day"], description="By:")

month_select = widgets.Button(description='Select Month', disabled=False, button_style='')

month_selections = widgets.VBox([month_dd, year_dd, month_timerange_dd, area_dd, month_select, error_messages])

# loading bar
loading1 = widgets.IntProgress(value=0, min=0, max=10, step=1, description='Loading:', 
                               bar_style='info', orientation='horizontal')

# output widgets
heatmap_by_time = widgets.Output() #go.FigureWidget() 
heatmap_popular_areas = go.FigureWidget()

# selections in folding accordian tabs
selections1 = widgets.Accordion(children=[day_selections, month_selections])
selections1.set_title(0, 'Show One Day')
selections1.set_title(1, 'Show One Month')

# output graph tabs
tab1 = widgets.Tab([heatmap_by_time, heatmap_popular_areas])
tab1.set_title(0, "Heatmap Over Time")
tab1.set_title(1, "Most Common Spots")

In [134]:
minutes15 = datetime.timedelta(minutes=15)
onehour = datetime.timedelta(hours=1)
sixhours = datetime.timedelta(hours=6)
oneday = datetime.timedelta(days=1)

def process_date_selection(clicked):
    if day_picker.value:
        startday = datetime.datetime(day_picker.value.year, day_picker.value.month, day_picker.value.day)
        
        if day_timerange_dd.value == "15 Minutes":
            timegap = minutes15
            num_iters = 24 * 4
        elif day_timerange_dd.value == "1 Hour":
            timegap = onehour
            num_iters = 24 
        else:
            timegap = sixhours
            num_iters = 4
            
        location = locations[area_dd.value][0]
        img = locations[area_dd.value][1]
        
        loading1.max = num_iters + 5
        loading1.value = 0
        loading1.bar_style = "info"
        
        display_heatmaps(startday, timegap, location, num_iters, img)
        
    else:
        error_messages.clear_output()
        with error_messages:
            print("Please Select A Day")
            
            
def process_month_selection(clicked):
    #date_or_month = "Date"
    #startday = daypicker.value
    year = year_dd.value
    month = months_choices[month_dd.value]
    startday = datetime.datetime(year=year, month=month, day=1)

    if month_timerange_dd.value == "6 Hours":
        timegap = sixhours
        num_iters = 4 * monthrange(year, month)[1]
    else:
        timegap = oneday
        num_iters = monthrange(year, month)[1]

    location = locations[area_dd.value][0]
    img = locations[area_dd.value][1]
    
    loading1.max = num_iters + 5
    loading1.value = 0
    loading1.bar_style = "info"

    display_heatmaps(startday, timegap, location, num_iters, img)
    
    
def display_heatmaps(startday, timegap, location, num_iters, img):
    heatmaps = query_heatmap_info(startday, timegap, location, num_iters)
    create_heatmaps(heatmaps, img, num_iters)
        

def query_heatmap_info(starttime, timegap, location, num_iters):
    heatmaps = {}
    for i in range(num_iters):
        str_start = (starttime + timegap * i).strftime("%Y-%m-%dT%H:%M:%S")
        str_end = (starttime + timegap * (i + 1)).strftime("%Y-%m-%dT%H:%M:%S")

        heatmapquery = f"""
        query {{
          feedHeatmaps(
            serialno:\"{location}\",
            startTime:\"{str_start}\",
            endTime:\"{str_end}\",
            objClasses:["pedestrian"],
            timezone:"America/New_York") {{
            edges {{
              node {{
                time
                objClass
                heatmap
              }}
            }}
          }}
        }}
        """

        heatmap_request = requests.post(url, json={'query': heatmapquery}, headers = {'Authorization':token})
        heatmaps[str_start] = heatmap_request.json()["data"]["feedHeatmaps"]["edges"][0]["node"]["heatmap"]
        loading1.value = loading1.value + 1
        
    return heatmaps


def create_heatmaps(heatmaps, img, num_iters):
    # put heatmap data into a dataframe to draw heatmaps as scatterplots
    data = pd.DataFrame(columns=["x", "y", "value", "DateTime"])
    for heat in heatmaps:
        new = pd.DataFrame(heatmaps[heat])
        # need some data even if no one there so the frame is there on the graph
        if len(new) == 0:
            new = pd.DataFrame({0: [0, 0], 1: [0, 0], 2: [0, 1]})

        new["DateTime"] = heat
        new = new.rename(columns={0: "x", 1: "y", 2: "value"})
        data = data.append(new)
    data["value"] = data["value"].astype(float)
    
    loading1.value = loading1.value + 1
    
    # due to the high volumne of data in a heatmap, to effectively visualize and plot 
    # the graph with the animations I've cut down on the data by keeping every 2nd or 3rd point
    if data["DateTime"].value_counts().values[0] > 20000:
        cut_data = data[(data["x"] % 3 == 0) & (data["y"] % 3 == 0)]
    elif data["DateTime"].value_counts().values[0] > 10000:
        cut_data = data[(data["x"] % 2 == 0) & (data["y"] % 2 == 0)]
    else:
        cut_data = data

    if cut_data["DateTime"].value_counts().values[0] > 4000:
        # clunky when switching cause so much data to plot
        stayonframe = 3000
    else:
        stayonframe = 2000
        
    heatmap_colorscale = px.colors.sequential.Jet[1:5]
    loading1.value = loading1.value + 1

    # heatmap with animations
    fig = px.scatter(cut_data, x="x", y="y", color="value", animation_frame="DateTime",
                     range_x=[0, 640], range_y=[480, 0], color_continuous_scale=heatmap_colorscale,
                     width=800, height=700, opacity=0.5, range_color=[0, 1],
                     labels={"x": "", "y": ""})
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = stayonframe
    fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 1000
    fig.add_layout_image(
            dict(
                source=img,
                xref="x",
                yref="y",
                x=0,
                y=0,
                sizex=640,
                sizey=480,
                sizing="stretch",
                opacity=1,
                layer="below")
    )
    fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, ticks='',
                      showticklabels=False),
                      yaxis=dict(showgrid=False, zeroline=False, ticks='',
                      showticklabels=False))
    fig.update_traces(marker=dict(size=10))
    
    heatmap_by_time.clear_output()
    with heatmap_by_time:
        fig.show()
    
    #heatmap_by_time.data = []
    #heatmap_by_time.add_traces(fig.data)
    #heatmap_by_time.layout = fig.layout
    loading1.value = loading1.value + 1
    
    # get data for heatmap of common areas
    summed_heatmap = data.groupby(["x", "y"])["value"].sum()
    summed_heatmap = summed_heatmap.reset_index()
    summed_heatmap = summed_heatmap[(summed_heatmap["x"] != 0) & (summed_heatmap["x"] != 0)]

    if (len(summed_heatmap)) > 0:
        max_val = max(list(summed_heatmap["value"]))
        min_val = min(list(summed_heatmap["value"]))
        bound = (max_val - min_val) / 4 + min_val
    else:
        max_val = 1
        bound = 0
        
    summed_heatmap = summed_heatmap[summed_heatmap["value"] > bound]
    
    loading1.value = loading1.value + 1
    
    # create heatmap of the common areas
    fig = px.scatter(summed_heatmap, x="x", y="y", color="value", range_x=[0, 640], 
                     range_y=[480, 0], color_continuous_scale=heatmap_colorscale,
                     width=800, height=600, opacity=0.3, range_color=[0, max_val],
                     labels={"x": "", "y": ""})
    fig.add_layout_image(dict(source=img,
                                xref="x",
                                yref="y",
                                x=0,
                                y=0,
                                sizex=640,
                                sizey=480,
                                sizing="stretch",
                                opacity=1,
                                layer="below"))
    fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, ticks='',
                      showticklabels=False),
                      yaxis=dict(showgrid=False, zeroline=False, ticks='',
                      showticklabels=False))
    
    heatmap_popular_areas.data = []
    heatmap_popular_areas.add_traces(fig.data)
    heatmap_popular_areas.layout = fig.layout
    loading1.value = loading1.value + 1
    loading1.bar_style = "success"

In [135]:
day_select.on_click(process_date_selection)
month_select.on_click(process_month_selection)

In [136]:
display(selections1)
display(loading1)
display(tab1)

Accordion(children=(VBox(children=(DatePicker(value=None, description='Date:'), Dropdown(description='By:', op…

IntProgress(value=0, bar_style='info', description='Loading:', max=10)

Tab(children=(Output(), FigureWidget({
    'data': [], 'layout': {'template': '...'}
})), _titles={'0': 'Heatm…

The "Most Common Spots" heatmap is created by summing up all of the heatmaps that appear in "Heatmaps Over Time", and showing the portion of points that appear more commonly than others.  This is to give an idea of what areas people tend to be in multiple times in a day, or multiple days in a month.  Clearer lines represent common paths taken, and high heat spots are the places people are most commonly in during that timespan.

# Part 3: Maintenance 

307 requires maintenance after 500 visitors or 500 hours of use, when should 307 plan on scheduling maintenance operations (e.g., what days and times)?

500 hours of use = around 21 days = around 3 weeks
Need maintenance regularly.  At most, 3 weeks.  

Might as well do maintenance every day.

1. Select a month or 2 months, plot counts by hour (observed and fitted by the exponential smoothing).  Add dots for what we suspect are events and lines for when 500 is reached outside of events
2. Plot predictions: very last month + predictions beyond that.  Add lines for when 500 visitors is reached.
3. bar chart of times of when we think are events
4. bar chart of times of when 500 visitors is reached outside of events.
5. plot all amount of hours to reach 500 visitors

First 2 combine provided plotly doesn't crash the renderer.

### Notes:
- The fitted and predicted graph may not load due to the overwhelming number of points plotted.  It can crash the browser renderer.  If it doesn't load, either press the "Render in Browser" button to load the graph in a separate browser tab directly (usually works).  If you really want it in the notebook, you may need to restart your computer (it worked for me).

In [13]:
# widgets

# inputs
area_dd2 = widgets.Dropdown(options=list(locations.keys()), description="Area:")
area_btn = widgets.Button(description='Select Area', disabled=False, button_style='')

# loading bar
loading2 = widgets.IntProgress(value=0, min=0, max=12, step=1, description='Loading:', 
                               bar_style='info', orientation='horizontal')

# outputs
time_graph = go.FigureWidget()
average_timeto500 = go.FigureWidget()
expected_event_hours = go.FigureWidget()
reached_500_hours = go.FigureWidget()

browser_render = widgets.Button(description='Render in Browser', disabled=False, button_style='')

# putting them together

area_selections = widgets.VBox([area_dd2, area_btn])
time_tab = widgets.VBox([browser_render, time_graph])

# selections in folding accordian tabs
selections2 = widgets.Accordion(children=[area_selections])
selections2.set_title(0, 'Select Area')

# output graph tabs
tab2 = widgets.Tab([time_tab, average_timeto500, expected_event_hours, reached_500_hours])
tab2.set_title(0, "Counts & Predictions")
tab2.set_title(1, "Time to 500 Visitors")
tab2.set_title(2, "Hours of Over 500 ")
tab2.set_title(3, "Hours 500 Reached")

In [14]:
timeseries_figure = None

def show_in_browser(click):
    if timeseries_figure:
        timeseries_figure.show(renderer="browser")


def update_maintenance_graphs(click):
    global timeseries_figure
    loading2.value = 0
    loading2.bar_style = "info"
    loading2.value = loading2.value + 1
    
    count_data = process_area_selection()
    loading2.value = loading2.value + 1
    
    maybe_events = count_data[count_data["result"] >= 500]
    our_event_days = list(maybe_events["date"].value_counts().index)
    loading2.value = loading2.value + 1
    
    over_capacity, gaps, over_capacity_dates, over_capacity_times = get_visitor_data(count_data, our_event_days)
    loading2.value = loading2.value + 1
    
    model, predictions, predicted_time_gaps, reached_capacity = fit_timeseries_model(count_data)
    loading2.value = loading2.value + 1
    
    figure = draw_timeseries_graph(count_data, model, predictions, over_capacity, reached_capacity)
    timeseries_figure = figure
    loading2.value = loading2.value + 1
    
    hours_graphs(count_data, over_capacity_times, maybe_events, gaps, predicted_time_gaps)
    
    loading2.value = loading2.value + 1
    loading2.bar_style = "success"
    

def process_area_selection():
    location = locations[area_dd2.value][0]
    if area_dd2.value == "Streetscape":
        starttime = "2019-02-20T00:00:00"
        endtime = "2020-01-12T00:00:00"
    
    elif area_dd2.value == "Under Raincoat":
        starttime = "2019-03-20T00:00:00"
        endtime = "2019-12-06T00:00:00"
        
    else: # Outside
        starttime = "2019-03-19T00:00:00"
        endtime = "2020-01-12T00:00:00"
    
    hourquery = f"""
    query {{
      feedCountMetrics(
        serialnos:[\"{location}\"],
        startTime:\"{starttime}\",
        endTime:\"{endtime}\",
        objClasses:["pedestrian"],
        timezone:"America/New_York",
        interval:"1h") {{
        edges {{
          node {{
            serialno
            result
            objClass
            time
          }}
        }}
      }}
    }}
    """
    all_streetscape_counts = requests.post(url, json={'query': hourquery}, headers = {'Authorization':token})
    counts_list = all_streetscape_counts.json()["data"]["feedCountMetrics"]["edges"]
    count_data = pd.DataFrame()
    for node in counts_list:
        data = pd.DataFrame(node["node"], index=[0])
        count_data = count_data.append(data)
        
    count_data = count_data.reset_index(drop=True)
    count_data = count_data.rename(columns={"time": "datetime"})
    count_data["date"] = count_data["datetime"].apply(lambda x: x[0:10])
    count_data["time"] = count_data["datetime"].apply(lambda x: x[11:19])
    
    return count_data


def get_visitor_data(count_data, our_event_days):
    visitors = 0
    over_capacity = []
    gaps = []
    over_capacity_dates = []
    over_capacity_times = []

    days_skipped = 0  # cause they're events
    for i, r in count_data.iterrows():
        if r["date"] not in our_event_days:

            visitors = visitors + r["result"]
            if visitors > 500:
                visitors = 0

                if len(over_capacity) > 0:
                    last = over_capacity[len(over_capacity) - 1]
                    curr_time = datetime.datetime.strptime(r["datetime"], "%Y-%m-%dT%H:%M:%S%z")
                    last_time = datetime.datetime.strptime(last, "%Y-%m-%dT%H:%M:%S%z")
                    time_gap = curr_time - last_time
                    time_gap = time_gap - datetime.timedelta(hours=1) * days_skipped
                    gaps.append(time_gap)

                over_capacity.append(r["datetime"])
                over_capacity_dates.append(r["date"])
                over_capacity_times.append(r["time"])
                days_skipped = 0
        else:
            if r["time"] == '00:00:00':
                days_skipped = days_skipped + 1
                
    gaps_hours = []
    for timegap in gaps:
        gaps_hours.append(timegap.days * 24 + timegap.seconds // 3600)
                
    return over_capacity, gaps_hours, over_capacity_dates, over_capacity_times


def fit_timeseries_model(count_data):
    fit_data = count_data[["datetime", "result"]]
    fit_data = fit_data.set_index("datetime")
    fit_data = pd.Series(fit_data["result"])
    # 24 seasonal periods since we have hourly data 
    model = ExponentialSmoothing(fit_data, seasonal_periods=24, trend='add', seasonal='add').fit()
    predictions = pd.DataFrame(model.forecast(768)).reset_index().rename(columns={"index": "Datetime", 0: "Visitors"})
    predictions["Visitors"] = predictions["Visitors"].apply(lambda x: int(max(x, 0)))
    
    visitors = 0
    predicted_time_gaps = []
    reached_capacity = []
    for i, r in predictions.iterrows():
        visitors = visitors + r["Visitors"]
        if visitors > 500:
            visitors = 0
            if len(predicted_time_gaps) == 0:
                last = predictions.loc[0]["Datetime"].to_pydatetime()
            else:
                last = reached_capacity[len(reached_capacity) - 1].to_pydatetime()
            curr = r["Datetime"].to_pydatetime()
            gap = curr - last
            predicted_time_gaps.append(gap)
            reached_capacity.append(r["Datetime"])
            
    gaps_hours = []
    for timegap in predicted_time_gaps:
        gaps_hours.append(timegap.days * 24 + timegap.seconds // 3600)
            
    return model, predictions,gaps_hours, reached_capacity


def draw_timeseries_graph(count_data, model, predictions, over_capacity, reached_capacity):
    fig = go.Figure() 
    fig.add_trace(go.Scatter(x=count_data["datetime"], y=count_data["result"],
                             mode="lines", name="Observed"))
    fig.add_trace(go.Scatter(x=model.fittedvalues.index, y=model.fittedvalues.values,
                             mode='lines', name='Fitted'))
    fig.add_trace(go.Scatter(x=predictions["Datetime"], y=predictions["Visitors"],
                             mode='lines', name='Predicted'))
    fig.add_trace(go.Scatter(x = count_data[count_data["result"] > 500]["datetime"],
                             y = count_data[count_data["result"] > 500]["result"], 
                             name= "Expected Events", mode='markers'))
    loading2.value = loading2.value + 1

    for i in range(len(over_capacity)):
        fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0=over_capacity[i],
                y0=0,
                x1=over_capacity[i],
                y1=max(count_data["result"]) + 100,
                line=dict(color="aqua", width=1)
        ))

    # so that the vertical lines show up in in the legend
    fig.add_trace(go.Scatter(x=[over_capacity[0]], y=[0],
                             mode='lines', name='500 Visitors Reached', line=dict(color="aqua")))
    
    loading2.value = loading2.value + 1

    for i in range(len(reached_capacity)):
        fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0=reached_capacity[i],
                y0=0,
                x1=reached_capacity[i],
                y1=max(count_data["result"]) + 100,
                line=dict(color="rgb(231,107,243)", width=1)
        ))
        
    if len(reached_capacity) > 0:
        spot = reached_capacity[0]
    else:
        spot = "2020-01-12T00:00:00-05:00"

    fig.add_trace(go.Scatter(x=[spot], y=[0],
                             mode='lines', name='Predicted 500 Visitors Reached', 
                             line=dict(color="rgb(231,107,243)")))

    fig.update_layout(xaxis_rangeslider_visible=True, width=900, height=600, 
                      xaxis=dict(title="DateTime"), yaxis=dict(title="Number of Pedestrians"), 
                      title="Observed and Predicted Pedestrian Counts and When 500 Visitors are Reached")
    
    time_graph.data = []
    time_graph.add_traces(fig.data)
    time_graph.layout = fig.layout
    return fig


def hours_graphs(count_data, over_capacity_times, maybe_events, gaps_hours, predicted_gaps_hours):
    times = list(count_data["time"].value_counts().index)
    times.sort()
    time_counts = pd.DataFrame({"Time": times})
    time_counts["Count"] = 0

    these_hours = pd.Series(over_capacity_times).value_counts()
    for i, r in time_counts.iterrows():
        if r["Time"] in these_hours.index:
            time_counts.loc[i, "Count"] = these_hours[r["Time"]]

    fig = px.bar(time_counts, x='Time', y='Count')
    fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0="07:00:00",
                y0=0,
                x1="07:00:00",
                y1=max(time_counts["Count"]) + 5,
                line=dict(color="red", width=3)
    ))

    fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0="20:00:00",
                y0=0,
                x1="20:00:00",
                y1=max(time_counts["Count"]) + 5,
                line=dict(color="red", width=3)
    ))

    fig.update_layout(title="Hours When Sidewalk Labs Reaches Over 500 Visitors")
    reached_500_hours.data = []
    reached_500_hours.add_traces(fig.data)
    reached_500_hours.layout = fig.layout
    loading2.value = loading2.value + 1
    
    
    times = list(count_data["time"].value_counts().index)
    times.sort()
    time_counts = pd.DataFrame({"Time": times})
    time_counts["Count"] = 0

    these_hours = maybe_events["time"].value_counts()
    for i, r in time_counts.iterrows():
        if r["Time"] in these_hours.index:
            time_counts.loc[i, "Count"] = these_hours[r["Time"]]

    fig = px.bar(time_counts, x='Time', y='Count')
    fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0="07:00:00",
                y0=0,
                x1="07:00:00",
                y1=max(time_counts["Count"]) + 5,
                line=dict(color="red", width=3)
    ))

    fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0="20:00:00",
                y0=0,
                x1="20:00:00",
                y1=max(time_counts["Count"]) + 5,
                line=dict(color="red", width=3)
    ))

    fig.update_layout(title="Hours With Over 500 Pedestrians Counted")    
    
    expected_event_hours.data = []
    expected_event_hours.add_traces(fig.data)
    expected_event_hours.layout = fig.layout
    loading2.value = loading2.value + 1
    
    
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=gaps_hours, 
                               marker=dict(color="fuchsia", line=dict(color="grey")), 
                               xbins=dict(size=10),
                               name="Hours To 500 Visitors"))
    fig.add_trace(go.Histogram(x=predicted_gaps_hours, 
                               marker=dict(color="rgb(255, 217, 102)"),
                               xbins=dict(size=10), 
                               name="Predicted Hours To 500 Visitors Next Month"))

    fig.update_layout(barmode='overlay', width=900, height=600, 
                      xaxis=dict(title="Hours"), yaxis=dict(title="Count"), 
                      title="Number of Hours to Reach 500 Visitors")
    fig.update_traces(opacity=0.75)
    
    average_timeto500.data = []
    average_timeto500.add_traces(fig.data)
    average_timeto500.layout = fig.layout
    loading2.value = loading2.value + 1
    

In [15]:
area_btn.on_click(update_maintenance_graphs)
browser_render.on_click(show_in_browser)

In [16]:
display(selections2)
display(loading2)
display(tab2)

Accordion(children=(VBox(children=(Dropdown(description='Area:', options=('Streetscape', 'Under Raincoat', 'Ou…

IntProgress(value=0, bar_style='info', description='Loading:', max=12)

Tab(children=(VBox(children=(Button(description='Render in Browser', style=ButtonStyle()), FigureWidget({
    …

Drew lines at 7am and 8pm since that's around the earliest and latest companies will work.  It does give an idea of what times would be best to schedule maintenance.  Definitely earlier and later in the day, far from the busy hours is best.

## Scrap Rough Work

In [93]:
heatmaps = {}

for i in range(num_iters):
    str_start = (starttime + timegap * i).strftime("%Y-%m-%dT%H:%M:%S")
    str_end = (starttime + timegap * (i + 1)).strftime("%Y-%m-%dT%H:%M:%S")
    
    heatmapquery = f"""
    query {{
      feedHeatmaps(
        serialno:\"{locations[location]}\",
        startTime:\"{str_start}\",
        endTime:\"{str_end}\",
        objClasses:["pedestrian"],
        timezone:"America/New_York") {{
        edges {{
          node {{
            time
            objClass
            heatmap
          }}
        }}
      }}
    }}
    """

    heatmap_request = requests.post(url, json={'query': heatmapquery}, headers = {'Authorization':token})
    heatmaps[str_start] = heatmap_request.json()["data"]["feedHeatmaps"]["edges"][0]["node"]["heatmap"]
    
    print(str_start)
    print(str_end)

2019-12-18T00:00:00
2019-12-18T00:15:00
2019-12-18T00:15:00
2019-12-18T00:30:00
2019-12-18T00:30:00
2019-12-18T00:45:00
2019-12-18T00:45:00
2019-12-18T01:00:00
2019-12-18T01:00:00
2019-12-18T01:15:00
2019-12-18T01:15:00
2019-12-18T01:30:00
2019-12-18T01:30:00
2019-12-18T01:45:00
2019-12-18T01:45:00
2019-12-18T02:00:00
2019-12-18T02:00:00
2019-12-18T02:15:00
2019-12-18T02:15:00
2019-12-18T02:30:00
2019-12-18T02:30:00
2019-12-18T02:45:00
2019-12-18T02:45:00
2019-12-18T03:00:00
2019-12-18T03:00:00
2019-12-18T03:15:00
2019-12-18T03:15:00
2019-12-18T03:30:00
2019-12-18T03:30:00
2019-12-18T03:45:00
2019-12-18T03:45:00
2019-12-18T04:00:00
2019-12-18T04:00:00
2019-12-18T04:15:00
2019-12-18T04:15:00
2019-12-18T04:30:00
2019-12-18T04:30:00
2019-12-18T04:45:00
2019-12-18T04:45:00
2019-12-18T05:00:00
2019-12-18T05:00:00
2019-12-18T05:15:00
2019-12-18T05:15:00
2019-12-18T05:30:00
2019-12-18T05:30:00
2019-12-18T05:45:00
2019-12-18T05:45:00
2019-12-18T06:00:00
2019-12-18T06:00:00
2019-12-18T06:15:00


In [94]:
# convert into dataframe to use as scatterplot for makeshift heatplot

data = pd.DataFrame(columns=["x", "y", "value", "DateTime"])

for heat in heatmaps:
    new = pd.DataFrame(heatmaps[heat])
    if len(new) == 0:
        new = pd.DataFrame({0: [0, 0], 1: [0, 0], 2: [0, 1]})
    
    new["DateTime"] = heat
    new = new.rename(columns={0: "x", 1: "y", 2: "value"})
    data = data.append(new)
    

data["value"] = data["value"].astype(float)
data

Unnamed: 0,x,y,value,DateTime
0,0,0,0.000,2019-12-18T00:00:00
1,0,0,1.000,2019-12-18T00:00:00
0,0,0,0.000,2019-12-18T00:15:00
1,0,0,1.000,2019-12-18T00:15:00
0,0,0,0.000,2019-12-18T00:30:00
...,...,...,...,...
2425,576,364,0.945,2019-12-18T23:30:00
2426,577,365,0.945,2019-12-18T23:30:00
2427,578,366,0.945,2019-12-18T23:30:00
0,0,0,0.000,2019-12-18T23:45:00


In [95]:
# due to the high volumne of data, to effectively visualize and plot the graph with the animations
# I've cut down on the data
# don't cut more than by a third though otherwise it looks too off

if data["DateTime"].value_counts().values[0] > 20000:
    cut_data = data[(data["x"] % 3 == 0) & (data["y"] % 3 == 0)]
elif data["DateTime"].value_counts().values[0] > 10000:
    cut_data = data[(data["x"] % 2 == 0) & (data["y"] % 2 == 0)]
    
if cut_data["DateTime"].value_counts().values[0] > 4000:
    # clunky when switching cause so much data to plot
    stayonframe = 3000
else:
    stayonframe = 2000

In [41]:
checking = data.groupby(["x", "y"])["value"].sum()
checking = checking.reset_index()
checking = checking[checking["value"] > 3]
checking = checking[(checking["x"] != 0) & (checking["x"] != 0)]

check = list(checking["value"])
check.sort(reverse=True)
max_val = check[0]
max_val

18.732

Graph of places that have people dwelling / passing through it multiple times in a day / multiple days in a month.

Note: can I make the bigger red dots bigger???? and the smaller blue ones smaller????

In [42]:
fig = px.scatter(checking, x="x", y="y", color="value",
                 range_x=[0, 640], range_y=[480, 0], color_continuous_scale=heatmap_colorscale,
                 width=800, height=600, opacity=0.3, range_color=[0, max_val],
                 labels={"x": "", "y": ""})


fig.add_layout_image(
        dict(
            source="streetscape.png",
            xref="x",
            yref="y",
            x=0,
            y=0,
            sizex=640,
            sizey=480,
            sizing="stretch",
            opacity=1,
            layer="below")
)

fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, ticks='',
                  showticklabels=False),
                  yaxis=dict(showgrid=False, zeroline=False, ticks='',
                  showticklabels=False))

fig.show()

# Visualization Interactions
- navigation
- selection
- filtering
- sorting
- deriving (transforming) variables
- hybrids

# Privacy
- are you selling your data to numina when it records you
- more generally, do you own your own data
- if you do own your own data then do you have the right to keep it private even if it can't be used to reidentify you

Read numina's respect for privacy