# Geospatial Taxi Demand Analysis

To analyze the taxi trip data of Chicago from the year 2015, this notebook will visualize the taxi usage in a geospatial context. Here different h3 maps with different resolutions and heatmaps are used. 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
import h3
from shapely.geometry import Polygon
import geopandas
from geojson import Feature, Point, FeatureCollection, Polygon
from shapely.geometry import Polygon
import plotly.express as px
from datetime import datetime

In [None]:
#Import cleaned dataset

trips_df = pd.read_parquet('../../data/rides/Taxi_Trips_Sampled_Cleaned.parquet')
trips_df.head(3)

We create a dictionary for each census tract name of Chicago and add it to our DataFrame to be able to reference to certain locations in our analysis more easily. The census tract list were retrived from [here](https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2000/pt6c-hxpp). 

In [None]:
import csv

csv_file = '../../data/census_tract/chicago_census_tract.csv'

# Create an empty dictionary
data_dict = {}

# Open the CSV file
with open(csv_file, 'r') as file:
    reader = csv.reader(file)

    # Skip the header row if present
    next(reader)

    # Iterate over each row in the CSV file
    for row in reader:
        ca = row[1]  # CA is in the second column
        community = row[2]  # COMMUNIT_1 is in the third column

        # Add the data to the dictionary
        data_dict[ca] = community

data_dict = dict(sorted(data_dict.items(), key=lambda item: int(item[0])))
print(data_dict)

In [None]:
# Create a new dictionary with integer keys
new_data_dict = {int(key): value for key, value in data_dict.items()}


In [None]:
key_types = [type(key) for key in new_data_dict.keys()]

In [None]:
# Match census tract name to community area unique value
trips_df['pickup_name'] = trips_df['pickup_community_area'].map(new_data_dict)
trips_df['dropoff_name'] = trips_df['dropoff_community_area'].map(new_data_dict)
trips_df.head(3)

In [None]:
#Test for NAN values
trips_df['pickup_name'].isna().sum()

## Heatmaps 

In this section we use Heatmaps to visualize the number of starting trips and ending trips. Due to the heatmaps being to large, we have to remove the cell outputs of the mapping. 
We [uploaded screenshots]() of these as an alternative.

### Starting Trips

In [None]:
from shapely import wkt

In [None]:
trips_df["pickup_centroid"] = geopandas.GeoSeries.from_wkt(trips_df["pickup_centroid"])

In [None]:
trips_df.set_geometry('pickup_centroid')

In [None]:
#Split pickup_centroid into coordinates
#taken from https://geopandas.org/en/stable/gallery/plotting_with_folium.html 

geo_df_list = [[point.xy[1][0], point.xy[0][0]] for point in trips_df.pickup_centroid]

In [None]:
geo_df_pickup = geopandas.GeoDataFrame(geo_df_list, trips_df["pickup_census_tract"])
geo_df_pickup = geo_df_pickup.reset_index()
geo_df_pickup["pickup_name"] = trips_df["pickup_name"]

geo_df_pickup

In [None]:
#geo_df_pickup = geo_df_pickup.reset_index()

#geo_df_pickup.rename(columns={0: "lat", 1: "lon"}, inplace=True)
geo_df_pickup = geo_df_pickup.reset_index()
geo_df_pickup

In [None]:
# Ploting heatmap that shows the census tracts and the frequency of trips starting there

trips_heatmap = folium.Map(
    location=(41.881832, -87.623177), # the orig mean values as location coordinates from https://www.latlong.net/place/chicago-il-usa-1855.html
    zoom_start=13,
    control_scale=True,
    max_zoom=20,
)

trips_heatmap.add_child(plugins.HeatMap(geo_df_list, radius=30))

for _,row in geo_df_pickup.iterrows():
    folium.CircleMarker(
        radius=5,
        location=[row[0], row[1]],
        popup= row["pickup_name"],
        color="crimson",
        fill_color="crimson",
    ).add_to(trips_heatmap)
    
    
trips_heatmap

In the heatmap we can see that the most popular hotspot for Taxis is **insert here when maps load**.

## Ending Trips

In [None]:
trips_df["dropoff_centroid"] = geopandas.GeoSeries.from_wkt(trips_df["dropoff_centroid"])

In [None]:
trips_df.set_geometry('dropoff_centroid')

In [None]:
geo_df_list_dropoff = [[point.xy[1][0], point.xy[0][0]] for point in trips_df.dropoff_centroid]

In [None]:
geo_df_dropoff = geopandas.GeoDataFrame(geo_df_list_dropoff, trips_df["dropoff_census_tract"])

In [None]:
geo_df_dropoff = geopandas.GeoDataFrame(geo_df_list, trips_df["dropoff_census_tract"])
geo_df_dropoff = geo_df_dropoff.reset_index()
geo_df_dropoff["dropoff_name"] = trips_df["dropoff_name"]

geo_df_dropoff

In [None]:
# Ploting heatmap that shows the census tracts and the frequency of trips ending there

trips_heatmap = folium.Map(
    location=(41.881832, -87.623177), # the orig mean values as location coordinates from https://www.latlong.net/place/chicago-il-usa-1855.html
    zoom_start=13,
    control_scale=True,
    max_zoom=20,
)

trips_heatmap.add_child(plugins.HeatMap(geo_df_list_dropoff, radius=30))

for _,row in geo_df_dropoff.iterrows():
    folium.CircleMarker(
        radius=5,
        location=[row[0], row[1]],
        popup= row["dropoff_name"],
        color="crimson",
        fill_color="crimson",
    ).add_to(trips_heatmap)
    
    
trips_heatmap

The most popular dropoff location is **insert here, when map finally loads**. 

## H3 maps 

Another geospatial visualisation that we use are h3 maps. We use the resolution types 7,8 and 9.
Due to the repetiveness of the resolutions for each categorie that we are looking at we will restrict the visualization to one resolution that represents the underlying analysis the best.

First we start off by creating DataFrames for each resolution and creating methods for the calculation of Taxi demand, hex and for visualisations.

In [None]:
# Creating a dataframe that contains all hexagons where at least one trip started or ended

hexagons7_df = pd.DataFrame()
hexagons8_df = pd.DataFrame()
hexagons9_df = pd.DataFrame()

hexagons7_df["hex"] = pd.concat([trips_df["h3_07_pickup"], trips_df["h3_07_dropoff"]]).unique()
hexagons8_df["hex"] = pd.concat([trips_df["h3_08_pickup"], trips_df["h3_08_dropoff"]]).unique()
hexagons9_df["hex"] = pd.concat([trips_df["h3_09_pickup"], trips_df["h3_09_dropoff"]]).unique()
hexagons7_df.head(3)
hexagons8_df.head(3)
hexagons9_df.head(3)

In [None]:
# Defining a funtion that generates heaxagon geometry for each hexagon
# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def add_geometry(row):
  points = h3.h3_to_geo_boundary(row['hex'], True)
  return Polygon(points)

In [None]:
#Applying function to our hexagons dataframe

hexagons7_df['geometry'] = (hexagons7_df
                                .apply(add_geometry,axis=1)) 

hexagons8_df['geometry'] = (hexagons8_df
                                .apply(add_geometry,axis=1)) 

hexagons9_df['geometry'] = (hexagons9_df
                                .apply(add_geometry,axis=1)) 


hexagons7_df.head(3)


In [None]:
# Defining a functions that will count trips for a given groupby value

def calculate_hexagon_trips(hexagons_df, label, group_by):
    hexagons_df[label] = trips_df.groupby(group_by).size()
    hexagons_df[label] = hexagons_df[label].fillna(value=0)

In [None]:
# Calculate starting and ending trips for each hexagon

hexagons7_df = hexagons7_df.set_index('hex')
hexagons8_df = hexagons8_df.set_index('hex')
hexagons9_df = hexagons9_df.set_index('hex')


calculate_hexagon_trips(hexagons7_df, label="starting_trips_07", group_by="h3_07_pickup")
calculate_hexagon_trips(hexagons7_df, label="ending_trips_07", group_by="h3_07_dropoff")
calculate_hexagon_trips(hexagons8_df, label="starting_trips_08", group_by="h3_08_pickup")
calculate_hexagon_trips(hexagons8_df, label="ending_trips_08", group_by="h3_08_dropoff")
calculate_hexagon_trips(hexagons9_df, label="starting_trips_09", group_by="h3_09_pickup")
calculate_hexagon_trips(hexagons9_df, label="ending_trips_09", group_by="h3_09_dropoff")

hexagons7_df = hexagons7_df.reset_index()
hexagons8_df = hexagons8_df.reset_index()
hexagons9_df = hexagons9_df.reset_index()

hexagons9_df.head(3)

In [None]:
hexagons7_df["pickup_name"]=trips_df["pickup_name"]
hexagons8_df["pickup_name"]=trips_df["pickup_name"]
hexagons9_df["pickup_name"]=trips_df["pickup_name"]

hexagons7_df

In [None]:
# Our approach uses the chloropleth_mapbox module of Plotly Express to build a map.
# To do this a GeoJSON-formatted dictionary is created by this method that can be passed to Plotly express. 

# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def hexagons_dataframe_to_geojson(df_hex, value_field, file_output = None):

    list_features = []

    for i, row in df_hex.iterrows():
        feature = Feature(geometry = row['geometry'],
                          id = row['hex'],
                          properties = {"value": row[value_field]})
        list_features.append(feature)

    feat_collection = FeatureCollection(list_features)

    if file_output is not None:
        with open(file_output, "w") as f:
            json.dump(feat_collection, f)

    else :
      return feat_collection

In [None]:
# Function that visualizes the H3 map

# Adapted from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def plot_frequency(dataset, variable, labels, range_color, palette="RdBu"):
    geojson_obj = (hexagons_dataframe_to_geojson(dataset, value_field=variable))

    fig = (px.choropleth_mapbox(
                    dataset,
                    width=700,
                    height=500,
                    geojson=geojson_obj, 
                    locations='hex', 
                 #   hover_name = "pickup_name",
                    color=variable,
                    color_continuous_scale=palette,
                    range_color=range_color,
                    mapbox_style='carto-positron',
                    zoom=10.5,
                    center = {"lat": 41.881832 ,"lon": -87.623177,},
                    opacity=0.7,
                    labels=labels))
    fig.update_layout(
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
    )
    return fig 

In [None]:
# Function to extract longitudes from a Polygon
#def get_longitudes_from_polygon(polygon):
#    return [coord[0] for coord in polygon.exterior.coords]

# Function to extract latitudes from a Polygon
#def get_latitudes_from_polygon(polygon):
#    return [coord[1] for coord in polygon.exterior.coords]

# Apply the function to the 'geometry' column to get the longitudes
#hexagons7_df['longitudes'] = hexagons7_df['geometry'].apply(get_longitudes_from_polygon)
#hexagons7_df['latitudes'] = hexagons7_df['geometry'].apply(get_latitudes_from_polygon)

# Function to plot the frequency with choropleth colors based on the variable
def plot_frequency_test(dataset, hover_name, variable, labels, range_color, palette="RdBu"):
    fig = (px.choropleth_mapbox(
        dataset,
        width=700,
        height=500,
        geojson=hexagons_dataframe_to_geojson(dataset, value_field=variable),
        locations='hex',
        hover_name=hover_name,
        color=variable,  # Use the 'variable' directly as the color
        color_continuous_scale=palette,
        range_color=range_color,
        zoom=10.5,
        center={"lat": 41.881832, "lon": -87.623177},
        opacity=0.7,
        labels=labels,
        animation_frame="month",
       # animation_group="hex",
        mapbox_style="open-street-map",
    ))
    fig.update_layout(
        sliders=[
            dict(
                active=0,
                bgcolor='black'
            )
        ]
    )
    return fig


# Hex7

## Starting Trips

In [None]:
variable = "starting_trips_07"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Starting Trips in Res7"},
    range_color=(0, hexagons7_df[variable].quantile(0.9)),
    palette="reds",
)

#fig.update_layout(
#    title = 'Starting trips 07',
    
#)

fig.show()

## Ending Trips

In [None]:
variable = "ending_trips_07"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Ending Trips in Res7"},
    range_color=(0, hexagons7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

## Demand Difference

### Overall

In [None]:
hexagons7_df["demand_difference_07"] = hexagons7_df["starting_trips_07"] - hexagons7_df["ending_trips_07"]
hexagons7_df.head(3)

In [None]:
# Ploting a map with hexagons depicting the difference in demand considering starting and ending trips

variable = "demand_difference_07"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Demand difference"},
    range_color=(
        -hexagons7_df[variable].quantile(0.95),
        hexagons7_df[variable].quantile(0.95),
    ),
)
fig.show()

### During morning and evening rush hour

In [None]:
# Function that returns the number of trips for the given hours and for the given 'group by' value

def calculate_hexagon_trips_by_hours(hexagons_df, label, group_by, hours):
    hexagons_df[label] = (
        trips_df[
            (trips_df["trip_start_timestamp"].dt.hour >= hours[0])
            & (trips_df["trip_start_timestamp"].dt.hour <= hours[1])
        ]
        .groupby(group_by)
        .size()
    )
    hexagons_df[label] = hexagons_df[label].fillna(value=0)

In [None]:
# Function that returns the number of trips for the given hours and for the given 'group by' value

def calculate_hexagon_trips_hour(hexagons_df, hexagon_num):
    hexagon_label = f"h3_0{hexagon_num}"
    #label_prefix = f"hexagon_{hexagon_num}"
    #hexagons_df = hexagons_df.set_index("hex")
    now = datetime.now()
    morning_hours = [now.replace(hour=6).hour, now.replace(hour=12).hour]
    evening_hours = [now.replace(hour=14).hour, now.replace(hour=20).hour]

    calculate_hexagon_trips_by_hours(
        hexagons_df, label="starting_trips_morning", group_by=f"{hexagon_label}_pickup", hours=morning_hours
    )
    calculate_hexagon_trips_by_hours(
        hexagons_df, label= "ending_trips_morning", group_by=f"{hexagon_label}_dropoff", hours=morning_hours
    )
    calculate_hexagon_trips_by_hours(
        hexagons_df, label= "starting_trips_evening", group_by=f"{hexagon_label}_pickup", hours=evening_hours
    )
    calculate_hexagon_trips_by_hours(
        hexagons_df, label= "ending_trips_evening", group_by=f"{hexagon_label}_dropoff", hours=evening_hours
    )

    hexagons_df["trips_difference_morning"] = (
        hexagons_df["starting_trips_morning"] - hexagons_df["ending_trips_morning"]
    )
    hexagons_df["trips_difference_evening"] = (
        hexagons_df["starting_trips_evening"] - hexagons_df["ending_trips_evening"]
    )

    return hexagons_df

In [None]:
# Calculate the difference between starting and ending trips for the morning (06:00-12:00) 
#and for the evening (14:00-20:00) in Resolution 7,8+9
 
hexagon_numbers = [7, 8, 9]
for hexagon_num in hexagon_numbers:
    hexagon_df = globals()[f"hexagons{hexagon_num}_df"]
    hexagon_df = hexagon_df.set_index("hex")
    hexagon_df = calculate_hexagon_trips_hour(hexagon_df, hexagon_num)
    hexagon_df = hexagon_df.reset_index()

    # Optionally, you can assign the updated DataFrame back to its original variable
    globals()[f"hexagons{hexagon_num}_df"] = hexagon_df

hexagons7_df.head(3)


In [None]:
# Ploting a map with hexagons depicting the demand difference in the morning

variable = "trips_difference_morning"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Demand Difference Morning"},
    range_color=(
        -hexagons7_df[variable].quantile(0.9),
        hexagons7_df[variable].quantile(0.9),
    ),
)
fig.show()

In [None]:
# Ploting a map with hexagons depicting the demand difference in the evening

variable = "trips_difference_evening"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Demand Difference Evening"},
    range_color=(
        -hexagons7_df[variable].quantile(0.9),
        hexagons7_df[variable].quantile(0.9),
    ),
)
fig.show()

# Hex8

In [None]:
variable = "starting_trips_08"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Starting Trips in Res8"},
    range_color=(0, hexagons8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "ending_trips_08"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Ending Trips in Res8"},
    range_color=(0, hexagons8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
hexagons8_df["demand_difference_08"] = hexagons8_df["starting_trips_08"] - hexagons8_df["ending_trips_08"]
hexagons8_df.head(3)

In [None]:
# Ploting a map with hexagons depicting the difference in demand considering starting and ending trips

variable = "demand_difference_08"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Demand difference"},
    range_color=(
        -hexagons8_df[variable].quantile(0.95),
        hexagons8_df[variable].quantile(0.95),
    ),
)
fig.show()

### During morning and evening rush hour

In [None]:
# Ploting a map with hexagons depicting the demand difference in the morning

variable = "trips_difference_morning"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Demand Difference Morning"},
    range_color=(
        -hexagons8_df[variable].quantile(0.9),
        hexagons8_df[variable].quantile(0.9),
    ),
)
fig.show()

In [None]:
# Ploting a map with hexagons depicting the demand difference in the evening

variable = "trips_difference_evening"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Demand Difference Evening"},
    range_color=(
        -hexagons8_df[variable].quantile(0.95),
        hexagons8_df[variable].quantile(0.95),
    ),
)
fig.show()

# Hex9

In [None]:
variable = "starting_trips_09"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Starting Trips in Res9"},
    range_color=(0, hexagons9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "ending_trips_09"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Ending Trips in Res9"},
    range_color=(0, hexagons9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
hexagons9_df["demand_difference_09"] = hexagons9_df["starting_trips_09"] - hexagons9_df["ending_trips_09"]
hexagons9_df

In [None]:
# Ploting a map with hexagons depicting the difference in demand considering starting and ending trips

variable = "demand_difference_09"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Demand difference"},
    range_color=(
        -hexagons9_df[variable].quantile(0.95),
        hexagons9_df[variable].quantile(0.95),
    ),
)
fig.show()

### During morning and evening rush hour

In [None]:
# Ploting a map with hexagons depicting the demand difference in the morning

variable = "trips_difference_morning"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Demand Difference Morning"},
    range_color=(
        -hexagons9_df[variable].quantile(0.9),
        hexagons9_df[variable].quantile(0.9),
    ),
)
fig.show()

In [None]:
# Ploting a map with hexagons depicting the demand difference in the evening

variable = "trips_difference_evening"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Demand Difference Evening"},
    range_color=(
        -hexagons9_df[variable].quantile(0.9),
        hexagons9_df[variable].quantile(0.9),
    ),
)
fig.show()

# Average idle time between trips

# Trip length

In [None]:
def plot_trips_net_monthly(h3_res: int, time_interval_length: int):
	trips_monthly_net = get_trips_net_monthly(h3_res, time_interval_length)

	trips_monthly_net['month'] = trips_monthly_net['datetime'].dt.month
	trips_monthly_net = trips_monthly_net.sort_values(by=['month'])

	h3_visualization.plot_choropleth(
		trips_monthly_net,
		hex_col="hex_id",
		color_by_col="demand",
		center=lat_lon_leipzig,
		color_continuous_scale="RdBu",
		range_color=(
			-50,
			50
		),
		animation_frame="month",
		opacity=0.7,
		zoom=10,
		labels={'demand': 'inflow - outflow'},
		mapbox_style="open-street-map",
	)

In [None]:
trips_shifted = trips_df.groupby("taxi_id").shift(1).dropna(subset=["trip_start_timestamp"])
trips_with_next = trips_df.merge(
    trips_shifted, left_index=True, right_index=True, how="inner", suffixes=("", "_next")
)
trips_with_next['taxi_id'] = trips_df['taxi_id']

In [None]:
selected_columns = ['pickup_centroid', 'pickup_centroid_next', 'h3_07_pickup', 'h3_07_pickup_next']
display_df = trips_with_next[selected_columns]
display_df

In [None]:
trips_with_next['idle_time'] = (
    trips_with_next.trip_end_timestamp - trips_with_next.trip_start_timestamp_next
)

In [None]:
trips_with_next.idle_time.describe()

In [None]:
trips_with_next['timeinterval'] = (
    trips_with_next.trip_start_timestamp.dt.floor('1D')
)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(
    trips_with_next.groupby("timeinterval").idle_time.median().dt.total_seconds()
    / 60
    / 60,
)
ax.set_xlabel("Time interval")
ax.set_ylabel("Median idle time (hours)")

plt.show()

In [None]:
trips_with_next.head()

In [None]:
trips_with_next['month'] = trips_with_next.trip_start_timestamp.dt.month

In [None]:
idle_by_hex7_time_median = trips_with_next.groupby(["h3_07_pickup_next", "month"])[
    "idle_time"
].median().rename("idle_time_median")
idle_by_hex7_time_mean = trips_with_next.groupby(["h3_07_pickup_next", "month"])[
    "idle_time"
].mean().rename("idle_time_mean")

idle_by_hex7_time = pd.concat(
	[idle_by_hex7_time_median, idle_by_hex7_time_mean], axis=1
).reset_index()

In [None]:
idle_by_hex7_time["idle_time_median_days"] = (
    idle_by_hex7_time["idle_time_median"].dt.total_seconds() / 60 / 60 / 24
)
idle_by_hex7_time["idle_time_mean_days"] = (
    idle_by_hex7_time["idle_time_mean"].dt.total_seconds() / 60 / 60 / 24
)

In [None]:
idle_by_hex7 = idle_by_hex7_time.groupby("h3_07_pickup_next").mean().reset_index()

In [None]:
idle_by_hex7.rename(columns={"h3_07_pickup_next":"hex"}, inplace=True)
idle_by_hex7_time.rename(columns={"h3_07_pickup_next":"hex"}, inplace=True)

In [None]:
import math

idle_by_hex7['month'] = idle_by_hex7['month'].astype(int)

idle_by_hex7.head()

In [None]:
idle_by_hex7['geometry'] = (idle_by_hex7.apply(add_geometry,axis=1)) 
idle_by_hex7_time['geometry'] = (idle_by_hex7.apply(add_geometry,axis=1)) 

In [None]:
idle_by_hex7_time['geometry'].isna().sum()
idle_by_hex7_time = idle_by_hex7_time.dropna(axis=0)
idle_by_hex7_time

In [None]:
########### TEST ############### 

idle_by_hex7 = idle_by_hex7.sort_values('month')

variable = "idle_time_median_days"

fig = plot_frequency_test(
    dataset=idle_by_hex7,
    hover_name = 'hex',
    variable=variable,
    labels={variable: "Idle Time in Res7"},
    range_color=(0, idle_by_hex7[variable].quantile(0.9)),
    palette="reds",

)

fig.show()

In [None]:
idle_by_hex7 = idle_by_hex7.sort_values('month')

variable = "idle_time_median_days"

fig = plot_frequency(
    dataset=idle_by_hex7,
    variable=variable,
    labels={variable: "Idle Time in Res7"},
    range_color=(0, idle_by_hex7[variable].quantile(0.9)),
    palette="reds",

)

fig.show()