In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
import h3
from shapely.geometry import Polygon
import geopandas
from geojson import Feature, Point, FeatureCollection, Polygon
from shapely.geometry import Polygon
import plotly.express as px
from datetime import datetime

In [None]:
#Import cleaned dataset

trips_df = pd.read_parquet('../../data/rides/Taxi_Trips_Sampled_Cleaned.parquet')
trips_df.columns

In [None]:
poi_df = pd.read_parquet('../../data/poi/poi_data.parquet')
print(poi_df.head())
print(poi_df.category.unique())
print(poi_df.amenity.unique())

In [None]:
poi_count_df = pd.read_parquet('../../data/poi/poi_hexagon_data.parquet')
poi_count_df.head()

In [None]:
 # Import necessary packages
import csv

csv_file = '../../data/census_tract/chicago_census_tract.csv'

# Create an empty dictionary
data_dict = {}

# Open the CSV file
with open(csv_file, 'r') as file:
    reader = csv.reader(file)

    # Skip the header row if present
    next(reader)

    # Iterate over each row in the CSV file
    for row in reader:
        ca = row[1]  # CA is in the second column
        community = row[2]  # COMMUNIT_1 is in the third column

        # Add the data to the dictionary
        data_dict[ca] = community

# Print the resulting dictionary
data_dict = dict(sorted(data_dict.items(), key=lambda item: int(item[0])))
print(data_dict)

# Create a new dictionary with integer keys
new_data_dict = {int(key): value for key, value in data_dict.items()}

key_types = [type(key) for key in new_data_dict.keys()]
trips_df['pickup_name'] = trips_df['pickup_community_area'].map(new_data_dict)
trips_df['dropoff_name'] = trips_df['dropoff_community_area'].map(new_data_dict)
trips_df

In [None]:
print(poi_count_df['sustenance_poi'].sum())

In [None]:
pickup_poi = pd.DataFrame()
pickup_poi['pickup_count'] = trips_df['pickup_name'].value_counts() 
pickup_poi['dropoff_count'] = trips_df['dropoff_name'].value_counts() 
pickup_poi = pickup_poi.fillna(0)
pickup_poi['dropoff_count'] = pickup_poi['dropoff_count'].astype(int)
pickup_poi = pickup_poi.sort_index()
pickup_poi


In [None]:
print(pickup_poi['pickup_count'].sum())

In [None]:
pickup_name_counts = trips_df['pickup_name'].value_counts()
count_for_loop = pickup_name_counts.get('Dunning', 0)
print(count_for_loop)


In [None]:
# custom legend name from: https://stackoverflow.com/questions/64371174/how-to-change-variable-label-names-for-the-legend-in-a-plotly-express-line-chart
def custom_legend_name(new_names):
    for i, new_name in enumerate(new_names):
        fig.data[i].name = new_name


fig = px.bar(
    x=pickup_poi.index,
    y=[pickup_poi['pickup_count'], pickup_poi['dropoff_count']],
    log_y=True,
    labels={'x': 'Neighborhood', 'y': 'Trip count'},
    title='Starting and Ending Trips for each Neighborhood',
    color_discrete_sequence=["#4B2991","#F6A97A"],  
    barmode='group'
)

custom_legend_name(['Starting trips','Ending trips'])
fig.update_yaxes(type='log', dtick='D3') # blend out intermediate (log) values
fig.show()

In [None]:
pois_starts = pd.merge(trips_df, poi_count_df, how='left', left_on = "h3_07_pickup", right_on = "h3")
pois_ends = pd.merge(trips_df, poi_count_df, how='left', left_on = "h3_07_dropoff", right_on = "h3")
pois_starts

In [None]:
grouped_df = pd.DataFrame()
grouped_df = pois_starts.groupby(['h3', 'pickup_name'])['sustenance_poi'].sum().reset_index()
grouped_df

In [None]:
print(grouped_df['sustenance_poi'].sum())

In [None]:
unique_hex_df = pois_starts.drop_duplicates(subset=['pickup_census_tract'])

# Gruppieren nach 'hex' und Summe der 'restaurant_count'
grouped_sum = unique_hex_df.groupby('h3')['sustenance_poi'].sum().reset_index()
grouped_sum

In [None]:
print(grouped_sum['sustenance_poi'].sum())

In [None]:
start_poi = pd.DataFrame()
start_poi['count'] = pois_starts.groupby('pickup_name')['sustenance_poi'].sum()
start_poi

In [None]:
print(pois_starts['sustenance_poi'].sum())

In [None]:
unique_df = pois_starts.drop_duplicates(subset=['pickup_name', 'sustenance_poi'])

# Gruppieren nach Pickup-Namen und Sustenance-POI und Summieren der Sustenance-POI
sustenance_counts = unique_df.groupby('pickup_name')['sustenance_poi'].sum().reset_index()

print(sustenance_counts)


In [None]:
print(sustenance_counts['sustenance_poi'].sum())

In [None]:
# Defining a funtion that generates heaxagon geometry for each hexagon
# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def add_geometry(row):
  points = h3.h3_to_geo_boundary(row['hex'], True)
  return Polygon(points)

In [None]:
# Function that visualizes the H3 map

# Adapted from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

#def plot_frequency(dataset, hover_name, variable, labels, range_color, palette="RdBu"):
    
def plot_frequency(dataset, variable, labels, range_color, palette="RdBu"):
    geojson_obj = (hexagons_dataframe_to_geojson(dataset, value_field=variable))

    fig = (px.choropleth_mapbox(
                    dataset,
                    width=700,
                    height=500,
                    geojson=geojson_obj, 
                    locations='hex', 
                  #  hover_name = hover_name,
                    color=variable,
                    color_continuous_scale=palette,
                    range_color=range_color,
                    mapbox_style='carto-positron',
                    zoom=10.5,
                    center = {"lat": 41.881832 ,"lon": -87.623177,},
                    opacity=0.7,
                    labels=labels))
    fig.update_layout(
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
    )
    return fig 

In [None]:
# Our approach uses the chloropleth_mapbox module of Plotly Express to build a map.
# To do this a GeoJSON-formatted dictionary is created by this method that can be passed to Plotly express. 

# taken from https://medium.com/analytics-vidhya/how-to-create-a-choropleth-map-using-uber-h3-plotly-python-458f51593548

def hexagons_dataframe_to_geojson(df_hex, value_field, file_output = None):

    list_features = []

    for i, row in df_hex.iterrows():
        feature = Feature(geometry = row['geometry'],
                          id = row['hex'],
                          properties = {"value": row[value_field]})
        list_features.append(feature)

    feat_collection = FeatureCollection(list_features)

    if file_output is not None:
        with open(file_output, "w") as f:
            json.dump(feat_collection, f)

    else :
      return feat_collection

In [None]:
poi_count_df = poi_count_df.rename(columns={'h3':'hex'})

poi_count_df["geometry"] = poi_count_df.apply(add_geometry, axis=1)

# Set geometry column
poi_count_df.set_geometry('geometry')

# Display the GeoDataFrame
poi_count_df.head()


### Sustenance

In [None]:
variable = "sustenance_poi"
sustenance6_df = poi_count_df[poi_count_df["h3_res"] == 6]

fig = plot_frequency(
    dataset=sustenance6_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Sustenance POI in Res6"},
    range_color=(0, sustenance6_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
variable = "sustenance_poi"
sustenance7_df = poi_count_df[poi_count_df["h3_res"] == 7]

fig = plot_frequency(
    dataset=sustenance7_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Sustenance POI in Res7"},
    range_color=(0, sustenance7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
sustenance7_df = poi_count_df[poi_count_df["h3_res"] == 7]
sustenance8_df = poi_count_df[poi_count_df["h3_res"] == 8]
sustenance9_df = poi_count_df[poi_count_df["h3_res"] == 9]

In [None]:
variable = "sustenance_poi"
fig = plot_frequency(
    dataset=sustenance8_df,
 #   hover_name= variable,
    variable=variable,
    labels={variable: "sustenance hex8"},
    range_color=(0, sustenance8_df[variable].max()),
    palette="greens",
)
fig.show()

In [None]:
variable = "sustenance_poi"
fig = plot_frequency(
    dataset=sustenance9_df,
 #   hover_name= variable,
    variable=variable,
    labels={variable: "sustenance hex9"},
    range_color=(0, sustenance9_df[variable].max()),
    palette="greens",
)
fig.show()

### Public transport

In [None]:
variable = "public_transport_poi"
public_transport6_df = poi_count_df[poi_count_df["h3_res"] == 6]

fig = plot_frequency(
    dataset=public_transport6_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Public transport POI in Res6"},
    range_color=(0, public_transport6_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
variable = "public_transport_poi"
public_transport7_df = poi_count_df[poi_count_df["h3_res"] == 7]

fig = plot_frequency(
    dataset=public_transport7_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Public transport POI in Res7"},
    range_color=(0, public_transport7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
variable = "public_transport_poi"
public_transport8_df = poi_count_df[poi_count_df["h3_res"] == 8]

fig = plot_frequency(
    dataset=public_transport8_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Public transport POI in Res8"},
    range_color=(0, public_transport8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
variable = "public_transport_poi"
public_transport9_df = poi_count_df[poi_count_df["h3_res"] == 9]

fig = plot_frequency(
    dataset=public_transport9_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Public transport POI in Res9"},
    range_color=(0, public_transport9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


### Education

In [None]:
variable = "education_poi"
education6_df = poi_count_df[poi_count_df["h3_res"] == 6]

fig = plot_frequency(
    dataset=education6_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Education POI in Res6"},
    range_color=(0, education6_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()


In [None]:
variable = "education_poi"
education7_df = poi_count_df[poi_count_df["h3_res"] == 7]

fig = plot_frequency(
    dataset=education7_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Education POI in Res7"},
    range_color=(0, education7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "education_poi"
education8_df = poi_count_df[poi_count_df["h3_res"] == 8]

fig = plot_frequency(
    dataset=education8_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Education POI in Res8"},
    range_color=(0, education8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "education_poi"
education9_df = poi_count_df[poi_count_df["h3_res"] == 9]

fig = plot_frequency(
    dataset=education9_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Education POI in Res9"},
    range_color=(0, education9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

### Arts and culture

In [None]:
variable = "arts_and_culture_poi"
aac6_df = poi_count_df[poi_count_df["h3_res"] == 6]

fig = plot_frequency(
    dataset=aac6_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Arts and culture in Res6"},
    range_color=(0, aac6_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "arts_and_culture_poi"
aac7_df = poi_count_df[poi_count_df["h3_res"] == 7]

fig = plot_frequency(
    dataset=aac7_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Arts and culture in Res7"},
    range_color=(0, aac7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "arts_and_culture_poi"
aac8_df = poi_count_df[poi_count_df["h3_res"] == 8]

fig = plot_frequency(
    dataset=aac8_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Arts and culture in Res8"},
    range_color=(0, aac8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "arts_and_culture_poi"
aac9_df = poi_count_df[poi_count_df["h3_res"] == 9]

fig = plot_frequency(
    dataset=aac9_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Arts and culture in Res6"},
    range_color=(0, aac9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

### Sports

In [None]:
variable = "sports_poi"
sports6_df = poi_count_df[poi_count_df["h3_res"] == 6]

fig = plot_frequency(
    dataset=sports6_df,
 #   hover_name=variable,
    variable=variable,
    labels={variable: "Sports in Res6"},
    range_color=(0, sports6_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "sports_poi"
sports7_df = poi_count_df[poi_count_df["h3_res"] == 7]

fig = plot_frequency(
    dataset=sports7_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Sports in Res7"},
    range_color=(0, sports7_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "sports_poi"
sports8_df = poi_count_df[poi_count_df["h3_res"] == 8]

fig = plot_frequency(
    dataset=sports8_df,
   # hover_name=variable,
    variable=variable,
    labels={variable: "Sports in Res8"},
    range_color=(0, sports8_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
variable = "sports_poi"
sports9_df = poi_count_df[poi_count_df["h3_res"] == 9]

fig = plot_frequency(
    dataset=sports9_df,
  #  hover_name=variable,
    variable=variable,
    labels={variable: "Sports in Res9"},
    range_color=(0, sports9_df[variable].quantile(0.9)),
    palette="reds"
)

fig.show()

In [None]:
poicategories = ['sustenance_poi',
       'public_transport_poi', 'education_poi', 'arts_and_culture_poi',
       'sports_poi']

pois9 = poi_count_df[poi_count_df["h3_res"] == 9]
naive_pois = pois9.copy()

for poi in poicategories:
    naive_pois[poi] = naive_pois[poi].clip(0,1)
naive_pois


trips_9 = poi_count_df[poi_count_df["h3_res"] == 9]
poiratios = pd.DataFrame(index = poicategories, columns = ["ratio"])
pois9["all_pois"] = pois9.sustenance_poi + pois9.public_transport_poi + pois9.education_poi + pois9.arts_and_culture_poi + pois9.sports_poi
for poi in poicategories:
    poiratios["ratio"][poiratios.index == poi] = pois9[poi].sum() / pois9["all_pois"].sum()

poiratios

In [None]:
fig = px.pie(values=poiratios["ratio"], names=poicategories, title='POI ratio', color_discrete_sequence=["#4B2991","#F6A97A"])
fig.show()