In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
import h3
from shapely.geometry import Polygon
import geopandas
from geojson import Feature, Point, FeatureCollection, Polygon
from shapely.geometry import Polygon
import plotly.express as px

In [None]:
trips_df = pd.read_parquet('../../data/rides/Taxi_Trips_Sampled_Cleaned.parquet')

In [None]:
trips_df.info()

In [None]:
trips_df.head()

In [None]:
trips_df.describe()

In [None]:
trips_df.columns

In [None]:
trips_df.rename(columns={"Pickup Centroid": "pickup_centroid", "Dropoff Centroid": "dropoff_centroid", "Pickup Census Tract": "pickup_census_tract", "Dropoff Census Tract": "dropoff_census_tract"}, inplace=True)

In [None]:
from shapely import wkt

In [None]:
trips_df["pickup_centroid"] = geopandas.GeoSeries.from_wkt(trips_df["pickup_centroid"])

In [None]:
#geo_df_test = geopandas.GeoDataFrame(trips_df)
#geo_df_test

In [None]:
trips_df.set_geometry('pickup_centroid')

In [None]:
# https://geopandas.org/en/stable/gallery/plotting_with_folium.html 
geo_df_list = [[point.xy[1][0], point.xy[0][0]] for point in trips_df.pickup_centroid]


In [None]:
geo_df_pickup = geopandas.GeoDataFrame(geo_df_list, trips_df["pickup_census_tract"])
geo_df_pickup

In [None]:
#geo_df_pickup = geo_df_pickup.reset_index()

#geo_df_pickup.rename(columns={0: "lat", 1: "lon"}, inplace=True)
geo_df_pickup = geo_df_pickup.reset_index()
geo_df_pickup

# Heatmap - Taxi demand

In [None]:
# Ploting heatmap that shows the stations and the frequency of trips starting there

trips_heatmap = folium.Map(
    location=(41.881832, -87.623177), # the orig mean values as location coordinates from https://www.latlong.net/place/chicago-il-usa-1855.html
    zoom_start=13,
    control_scale=True,
    max_zoom=20,
)

trips_heatmap.add_child(plugins.HeatMap(geo_df_list, radius=30))

#for _, row in geo_df_list.iterrows():
#    folium.CircleMarker(
#        radius=5,
#        location=[row["lat"], row["lon"]],
#        popup= row["name"],
#        color="crimson",
#        fill_color="crimson",
#    ).add_to(trips_heatmap)
    
 

for _,row in geo_df_pickup.iterrows():
    folium.CircleMarker(
        radius=5,
        location=[row[0], row[1]],
        popup= row["pickup_census_tract"],
        color="crimson",
        fill_color="crimson",
    ).add_to(trips_heatmap)
    
    
trips_heatmap

Census tract 17031839100.0 has the most pick up's -> Pick up hotspot

In [None]:
trips_df["dropoff_centroid"] = geopandas.GeoSeries.from_wkt(trips_df["dropoff_centroid"])

In [None]:
trips_df.set_geometry('dropoff_centroid')

In [None]:
geo_df_list_dropoff = [[point.xy[1][0], point.xy[0][0]] for point in trips_df.dropoff_centroid]

In [None]:
geo_df_dropoff = geopandas.GeoDataFrame(geo_df_list_dropoff, trips_df["dropoff_census_tract"])

In [None]:
geo_df_dropoff = geo_df_dropoff.reset_index()
geo_df_dropoff

In [None]:
trips_heatmap = folium.Map(
    location=(41.881832, -87.623177), # the orig mean values as location coordinates from https://www.latlong.net/place/chicago-il-usa-1855.html
    zoom_start=13,
    control_scale=True,
    max_zoom=20,
)

trips_heatmap.add_child(plugins.HeatMap(geo_df_list_dropoff, radius=30))

for _,row in geo_df_dropoff.iterrows():
    folium.CircleMarker(
        radius=5,
        location=[row[0], row[1]],
        popup= row["dropoff_census_tract"],
        color="crimson",
        fill_color="crimson",
    ).add_to(trips_heatmap)
    
    
trips_heatmap

Dropoff census 17031839100.0 is hotspot, just like the pickups

# Incoming and outgoing trips

In [None]:
# Creating a dataframe that contains all hexagons where at least one trip started or ended

hexagons7_df = pd.DataFrame()
hexagons8_df = pd.DataFrame()
hexagons9_df = pd.DataFrame()

hexagons7_df["hex"] = pd.concat([trips_df["h3_07_Pickup"], trips_df["h3_07_Dropoff"]]).unique()
hexagons8_df["hex"] = pd.concat([trips_df["h3_08_Pickup"], trips_df["h3_08_Dropoff"]]).unique()
hexagons9_df["hex"] = pd.concat([trips_df["h3_09_Pickup"], trips_df["h3_09_Dropoff"]]).unique()
hexagons7_df.head(3)
hexagons8_df.head(3)
hexagons9_df.head(3)

In [None]:
# Defining a funtion that generates heaxagon geometry for each hexagon
# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def add_geometry(row):
  points = h3.h3_to_geo_boundary(row['hex'], True)
  return Polygon(points)

In [None]:
#Applying function to our hexagons dataframe

hexagons7_df['geometry'] = (hexagons7_df
                                .apply(add_geometry,axis=1)) 

hexagons8_df['geometry'] = (hexagons8_df
                                .apply(add_geometry,axis=1)) 

hexagons9_df['geometry'] = (hexagons9_df
                                .apply(add_geometry,axis=1)) 


hexagons7_df.head(3)


In [None]:
# Defining a functions that will count trips for a given groupby value

def calculate_hexagon_trips(hexagons_df, label, group_by):
    hexagons_df[label] = trips_df.groupby(group_by).size()
    hexagons_df[label] = hexagons_df[label].fillna(value=0)

In [None]:
# Calculate starting and ending trips for each hexagon

hexagons7_df = hexagons7_df.set_index('hex')
hexagons8_df = hexagons8_df.set_index('hex')
hexagons9_df = hexagons9_df.set_index('hex')


calculate_hexagon_trips(hexagons7_df, label="starting_trips_07", group_by="h3_07_Pickup")
calculate_hexagon_trips(hexagons7_df, label="ending_trips_07", group_by="h3_07_Dropoff")
calculate_hexagon_trips(hexagons8_df, label="starting_trips_08", group_by="h3_08_Pickup")
calculate_hexagon_trips(hexagons8_df, label="ending_trips_08", group_by="h3_08_Dropoff")
calculate_hexagon_trips(hexagons9_df, label="starting_trips_09", group_by="h3_09_Pickup")
calculate_hexagon_trips(hexagons9_df, label="ending_trips_09", group_by="h3_09_Dropoff")

hexagons7_df = hexagons7_df.reset_index()
hexagons8_df = hexagons8_df.reset_index()
hexagons9_df = hexagons9_df.reset_index()

hexagons9_df.head(3)

In [None]:
# Computing the hex id for each station and merging the results with the hexagon data afterwards

#stations_df["hex"] = stations_df.apply(
#    lambda station: convert_to_hex(station["lat"], station["lon"]), axis=1
#)

#stations_df = pd.merge(stations_df, hexagons_df, left_on="hex", right_on="hex")
#stations_df.head(3)

In [None]:
# Our approach uses the chloropleth_mapbox module of Plotly Express to build a map.
# To do this a GeoJSON-formatted dictionary is created by this method that can be passed to Plotly express. 

# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def hexagons_dataframe_to_geojson(df_hex, value_field, file_output = None):

    list_features = []

    for i, row in df_hex.iterrows():
        feature = Feature(geometry = row['geometry'],
                          id = row['hex'],
                          properties = {"value": row[value_field]})
        list_features.append(feature)

    feat_collection = FeatureCollection(list_features)

    if file_output is not None:
        with open(file_output, "w") as f:
            json.dump(feat_collection, f)

    else :
      return feat_collection

In [None]:
# Function that visualizes the H3 map

# Adapted from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def plot_frequency(dataset, variable, labels, range_color, palette="RdBu"):
    geojson_obj = (hexagons_dataframe_to_geojson(dataset, value_field=variable))

    fig = (px.choropleth_mapbox(
                    dataset,
                    width=700,
                    height=500,
                    geojson=geojson_obj, 
                    locations='hex', 
                    #hover_name = "name",
                    color=variable,
                    color_continuous_scale=palette,
                    range_color=range_color,
                    mapbox_style='carto-positron',
                    zoom=10.5,
                    center = {"lat": 41.881832 ,"lon": -87.623177,},
                    opacity=0.7,
                    labels=labels))
    fig.update_layout(
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
    )
    return fig 

# Hex 07

In [None]:
variable = "starting_trips_07"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Starting trips 07"},
    range_color=(0, hexagons7_df[variable].quantile(0.9)),
    palette="reds",
)

#fig.update_layout(
#    title = 'Starting trips 07',
    
#)

fig.show()

In [None]:
variable = "ending_trips_07"

fig = plot_frequency(
    dataset=hexagons7_df,
    variable=variable,
    labels={variable: "Ending trips 07"},
    range_color=(0, hexagons7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

# Hex8

In [None]:
variable = "starting_trips_08"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Starting trips 08"},
    range_color=(0, hexagons8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "ending_trips_08"

fig = plot_frequency(
    dataset=hexagons8_df,
    variable=variable,
    labels={variable: "Ending trips 07"},
    range_color=(0, hexagons8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

# Hex9

In [None]:
variable = "starting_trips_09"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Starting trips 09"},
    range_color=(0, hexagons9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "ending_trips_09"

fig = plot_frequency(
    dataset=hexagons9_df,
    variable=variable,
    labels={variable: "Ending trips 09"},
    range_color=(0, hexagons9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

# Census tract dict


In [None]:
csv_file = '../../data/census_tract/chicago_census_tract.csv'


# Python3 program to read CSV file using DictReader
  
# Import necessary packages
import csv
  
# Open file
#with open(path) as file_obj:
      
    # Create reader object by passing the file
    # object to DictReader method
 #   reader_obj = csv.DictReader(file_obj)
      
    # Iterate over each row in the csv file
    # using reader object
  #  for row in reader_obj:
   #     print(row)


        
#with open(path) as f:
 #  for line in csv.DictReader(f, fieldnames=('GEOID', 'COMMUNIT_1')):
  #    print(line)


# Create an empty dictionary
data_dict = {}

# Open the CSV file
with open(csv_file, 'r') as file:
    reader = csv.reader(file)

    # Skip the header row if present
    next(reader)

    # Iterate over each row in the CSV file
    for row in reader:
        geoid = row[0]  # Assuming GEOID20 is in the first column
        community = row[2]  # Assuming COMMUNIT_1 is in the second column

        # Add the data to the dictionary
        data_dict[geoid] = community

# Print the resulting dictionary
print(data_dict)



In [None]:
# Create a new dictionary with integer keys
new_data_dict = {int(key): value for key, value in data_dict.items()}

# Print the new dictionary
print(new_data_dict)

In [None]:
trips_df['pickup_name'] = trips_df['pickup_census_tract'].map(new_data_dict)
trips_df

In [None]:
key_types = [type(key) for key in new_data_dict.keys()]

# Print the data types
for key_type in key_types:
    print(key_type)

In [None]:
trips_df['pickup_name'].isna().sum()

# Average idle time between trips

# Trip length

# POI