In [2]:
# Import data
import pandas as pd
df_business = pd.read_csv("../data/cleaned_businessV2.csv")

# Prepare

In [4]:
# Helper functions
from datetime import datetime, timedelta

def get_opening_float(time_interval):
    opening_time = time_interval.split("-")
    opening_hour, opening_minute = opening_time[0].split(":")
    opening_time_float = float(opening_hour) + float(opening_minute) / 60.0
    return opening_time_float

def get_closing_float(time_interval):
    opening_time = time_interval.split("-")
    closing_hour, closing_minute = opening_time[1].split(":")
    closing_time_float = float(closing_hour) + float(closing_minute) / 60.0
    return closing_time_float

def get_open_duration_float(time_interval):
    # Split the string into start and end times
    start_time_str, end_time_str = time_interval.split('-')

    # Convert the strings to datetime objects
    start_time = datetime.strptime(start_time_str, "%H:%M")
    end_time = datetime.strptime(end_time_str, "%H:%M")
    
    # Adjust for intervals that cross midnight
    if end_time <= start_time:
        end_time += timedelta(days=1)

    # Calculate the difference in hours and return as a float
    time_difference = end_time - start_time

    hours = time_difference.total_seconds() / 3600
    return abs(hours)

test_time_interval = "5:00-5:00"

print("test_opening:", get_opening_float(test_time_interval))
print("get_closing_float:", get_closing_float(test_time_interval))
print("test_duration:", get_open_duration_float(test_time_interval))



test_opening: 5.0
get_closing_float: 5.0
test_duration: 24.0


In [5]:
# Define kinds of restaurants we are interested in. May need to delete this later
# to allow the user to define this with UI
categories_of_interest = ['Chinese', 'Japanese', 'Italian', 'Polish', 'Scandinavian']

# Convert column types to string
df_business = df_business.convert_dtypes()
#print(f"df_business.dtypes: \n{df_business.dtypes}")

# Create new column containing a specific category of interest. 
# If not in interest, label the column value "Other"
df_business['category_of_interest'] = "Other"
for item in categories_of_interest:
    df_business.loc[df_business['categories'].str.contains(item), 'category_of_interest'] = item

# Define the days of the weeks for iteration
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
rating_groups = ["Rating 1-2", "Rating 2-3", "Rating 3-4", "Rating 4-5"]

df_business["Rating_Group"] = pd.cut(df_business["stars"], bins=[1,2,3,4,5], labels=rating_groups)

for day in weekdays:
    # Drop the columns where shops are closed
    df_business = df_business[df_business["hours_" + day] != "Closed"]
    # Create columns for our x-values
    df_business[day + "_Hour_Of_Opening_Float"] = df_business["hours_" + day ].apply(get_opening_float)
    # Create columns for our y-values
    df_business[day + "_Open_Duration_Float"] = df_business["hours_" + day].apply(get_open_duration_float)

df_business.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,Wednesday_Hour_Of_Opening_Float,Wednesday_Open_Duration_Float,Thursday_Hour_Of_Opening_Float,Thursday_Open_Duration_Float,Friday_Hour_Of_Opening_Float,Friday_Open_Duration_Float,Saturday_Hour_Of_Opening_Float,Saturday_Open_Duration_Float,Sunday_Hour_Of_Opening_Float,Sunday_Open_Duration_Float
0,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,"39.9555052, -75.1555641",PA,19107,39.955505,-75.155564,4.0,80,...,7.0,13.0,7.0,13.0,7.0,14.0,7.0,14.0,7.0,14.0
3,QdN72BWoyFypdGJhhI5r7g,Bar One,767 S 9th St,"39.9398245705, -75.1574465632",PA,19147,39.939825,-75.157447,4.0,65,...,16.0,8.0,12.0,12.0,12.0,14.0,11.0,15.0,11.0,13.0
9,O1oZpbZNDMH_gz8DhsZCdA,Wendy's,700 E. Hunting Park,"40.012141, -75.1150148",PA,19124,40.012141,-75.115015,1.5,15,...,10.0,16.0,10.0,16.0,10.0,16.0,10.0,16.0,10.0,16.0


# Filter by Widget Scatterplot with Our Dataset

In [6]:
# Variation Filter Out Groups, Hide/Show Days
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, CustomJS, Select, Legend, LegendItem
from bokeh.plotting import figure, save, output_file
from bokeh.models.filters import CustomJSFilter
from bokeh.models import CDSView
from bokeh.io import show, output_notebook
from bokeh.models import CheckboxGroup, CustomJS
from bokeh.layouts import row
from bokeh.palettes import Colorblind  # For Colorblind palette
from bokeh.transform import factor_cmap  # For factor-based color mapping

# Set up data sources
source = ColumnDataSource(df_business)
#original_source = ColumnDataSource(df_business)

# Used to keep track of our scatter_plots inside the figure. 
# Specifically used to hide/show the individual scatterplots
scatters_dict = {}

# Define colors
colors = Colorblind[len(rating_groups)]

# Display checkboxes for rating group and weekdays. Let all be checked upon load
checkboxes_rating_groups = CheckboxGroup(labels=rating_groups, active=[0,3])
checkboxes_weekdays = CheckboxGroup(labels=weekdays, active=list(range(0, len(weekdays))))

# Create the scatter plot figure with fixed axis ranges
fig = figure(title="Opening Hours and Durations of Restaurants",
    x_axis_label="Opening Hour",
    y_axis_label="Duration",
    x_range=(0, 25),
    y_range=(0, 25)
)

# Create a CDSView, which filters on rating group. We will only see points
# which has a Rating_Group value equal to the name of the ticked boxes.
view = CDSView(filter=CustomJSFilter(code="""
let selected = checkboxes_rating_groups.active.map(i=>checkboxes_rating_groups.labels[i]);
let indices = [];
let column = source.data.Rating_Group;
for(let i=0; i<column.length; i++){
    if(selected.includes(column[i])){
        indices.push(i);
    }
}
return indices;
""", args=dict(checkboxes_rating_groups=checkboxes_rating_groups)))

# Make rating group checkboxes update the source upon click
checkboxes_rating_groups.js_on_change("active", CustomJS(args=dict(source=source), code="source.change.emit();"))

# Make weekday group checkboxes visible on invisible upon click
checkboxes_weekdays.js_on_change('active', CustomJS(args=dict(scatters_dict = scatters_dict), code="""
    let active = cb_obj.active;                                      
    let days = Object.keys(scatters_dict);                                   
    for (let i=0; i < days.length; i++) {
        let day = days[i];                                                                                                                                   
        for (let j=0; j < scatters_dict[day].length; j++) {                                         
           let scatter_of_group_on_day = scatters_dict[day][j];
           scatter_of_group_on_day.visible = active.includes(i); 
           //console.log(`(${i},${j}) ${scatter_of_group_on_day.name}: ${scatter_of_group_on_day.visible}`)                                                                                                                          
        }                                          
    }                                                
"""))

# Create scatterplots
for i, rating_group in enumerate(rating_groups):
    for j, weekday in enumerate(weekdays):
        scatter = fig.scatter(x=weekday + "_Hour_Of_Opening_Float", 
                    y=weekday + "_Open_Duration_Float", 
                    source=source, 
                    size=7, 
                    color=factor_cmap("Rating_Group", colors, rating_groups), 
                    alpha=0.5, 
                    view=view,
                    #legend_field="Rating_Group"
                    )
        # Initialise the key if it isn't already there. 
        # Append if the key is already there
        scatters_dict.setdefault(weekday, []).append(scatter)

# Hack to create static legends that are not hooked up to any renderers:
# Add dummy glyphs to represent legend items (not visible)
dummy_glyphs = []
for color in colors:
    dummy_glyphs.append(fig.scatter(1, 1, size=10, color=color, visible=False))

# Create a Legend manually with static items
legend_items = [
    LegendItem(label=rating_groups[i], renderers=[dummy_glyphs[i]])
    for i in range(len(rating_groups))
]
legend = Legend(items=legend_items)

# Add the legend to the figure
fig.add_layout(legend, "right")  # Position it on the right

output_file("scatterplot-toggleable.html")

output_notebook()

# Show the plot
show(row(fig, checkboxes_rating_groups, checkboxes_weekdays))

