In [6]:
import geopandas as gpd

# Load GeoJSON file
gdf_neighborhoods = gpd.read_file('philadelphia.geojson')

# Check the first few entries to confirm the structure
print(gdf_neighborhoods[['name']].head())




                   name
0        Pennypack Park
1             Overbrook
2  Southwest Germantown
3         East Parkside
4          Germany Hill


In [8]:
import pandas as pd

data = pd.read_csv('yelp_academic_dataset_with_price_rating.csv')
# Convert DataFrame to GeoDataFrame
gdf_data = gpd.GeoDataFrame(
    data, 
    geometry=gpd.points_from_xy(data.longitude, data.latitude),
    crs="EPSG:4326"  # Make sure this matches your GeoJSON CRS
)

In [9]:
# Spatial join - points to polygons
data_with_neighborhoods = gpd.sjoin(gdf_data, gdf_neighborhoods, how="inner", op='intersects')

# Check the results
data_with_neighborhoods.head()


  if await self.run_code(code, result, async_=asy):


Unnamed: 0,business_id,name_left,address,city,state,postal_code,latitude,longitude,stars,review_count,attributes,Categories,price_rating,geometry,index_right,name_right,created_at,updated_at,cartodb_id
0,MUTTqe8uqyMdBl186RmNeA,Tuna Bar,205 Race St,Philadelphia,PA,19106,39.953949,-75.143226,4.0,245,"{'RestaurantsReservations': 'True', 'Restauran...",Japanese,2.0,POINT (-75.14323 39.95395),109,Old City,2013-03-19 16:41:50.508000+00:00,2013-03-19 16:41:50.743000+00:00,110
29,rVfe1LHo55A2fworkm_jVw,Capofitto,233 Chestnut St,Philadelphia,PA,19106,39.948578,-75.145023,4.5,205,"{'RestaurantsPriceRange2': '2', 'BikeParking':...",Italian,2.0,POINT (-75.14502 39.94858),109,Old City,2013-03-19 16:41:50.508000+00:00,2013-03-19 16:41:50.743000+00:00,110
38,8LonS_bxNmCDk3FWiG9lzA,Tomo Sushi & Ramen,228 Arch St,Philadelphia,PA,19106,39.951923,-75.144335,4.5,193,"{'Alcohol': ""u'none'"", 'RestaurantsTakeOut': '...",Japanese,2.0,POINT (-75.14433 39.95192),109,Old City,2013-03-19 16:41:50.508000+00:00,2013-03-19 16:41:50.743000+00:00,110
44,SIoCIxjn4jLt2O-4DajWJw,Mac's Tavern,226 Market St,Philadelphia,PA,19106,39.949794,-75.144739,3.5,446,"{'OutdoorSeating': 'True', 'BusinessParking': ...",American,2.0,POINT (-75.14474 39.94979),109,Old City,2013-03-19 16:41:50.508000+00:00,2013-03-19 16:41:50.743000+00:00,110
67,AWJyivVwe3t9BO-k8D0ynQ,Cafe Square One,50 S 3rd St,Philadelphia,PA,19106,39.948907,-75.14608,4.5,245,"{'BikeParking': 'False', 'RestaurantsPriceRang...",American,1.0,POINT (-75.14608 39.94891),109,Old City,2013-03-19 16:41:50.508000+00:00,2013-03-19 16:41:50.743000+00:00,110


In [10]:
# Assuming 'review_count' needs to be summed up
# Assuming 'data_with_neighborhoods' is your GeoDataFrame after the spatial join
data_grouped = data_with_neighborhoods.groupby('name_right')['review_count'].mean().reset_index()

# Rename the column to reflect that it contains averages
data_grouped.rename(columns={'review_count': 'average_review_count'}, inplace=True)

# Check the results
print(data_grouped)

            name_right  average_review_count
0              Airport            162.250000
1       Allegheny West            219.500000
2              Andorra            152.666667
3          Bella Vista            290.613636
4          Brewerytown            138.000000
..                 ...                   ...
87    Wissahickon Park            191.000000
88         Wissinoming            247.500000
89    Woodland Terrace            241.000000
90          Wynnefield            332.000000
91  Wynnefield Heights            117.000000

[92 rows x 2 columns]


In [12]:
# Assuming 'created_at' and 'updated_at' are the datetime fields
gdf_neighborhoods['created_at'] = gdf_neighborhoods['created_at'].astype(str)
gdf_neighborhoods['updated_at'] = gdf_neighborhoods['updated_at'].astype(str)

# Now proceed to merge the DataFrame with average review counts
gdf_neighborhoods = gdf_neighborhoods.merge(data_grouped, how='left', left_on='name', right_on='name_right')

# Convert the merged GeoDataFrame back to GeoJSON for use in Folium
geo_json_merged = json.loads(gdf_neighborhoods.to_json())


In [16]:
import folium
import geopandas as gpd
import pandas as pd
import json

# Load the GeoJSON file as a GeoDataFrame to perform operations like merge
gdf_neighborhoods = gpd.read_file('philadelphia.geojson')

# Assuming you have your data_grouped DataFrame prepared with 'name_right' and 'average_review_count'
# Example data_grouped creation (make sure to use your actual data processing)
# data_grouped = pd.DataFrame({
#     'name_right': ['Pennypack Park', 'Another Park'],
#     'average_review_count': [120, 150]
# })

# Merge your data into the GeoDataFrame based on the GeoJSON 'name' property
gdf_neighborhoods = gdf_neighborhoods.merge(data_grouped, left_on='name', right_on='name_right', how='left')

# Convert GeoDataFrame back to GeoJSON for Folium usage
geo_json_data = json.loads(gdf_neighborhoods.to_json())

# Create the Folium map centered around Philadelphia
m = folium.Map(location=[39.9526, -75.1652], zoom_start=12)

# Add the choropleth layer using the merged GeoJSON data
choropleth = folium.Choropleth(
    geo_data=geo_json_data,
    name='choropleth',
    data=data_grouped,
    columns=['name_right', 'average_review_count'],
    key_on='feature.properties.name',  # This matches the GeoJSON 'name' property
    fill_color='YlOrRd',  # Color scale can be adjusted
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Review Count per Neighborhood'
).add_to(m)

# Add tooltips using the GeoJson layer for more control over styling and interactivity
folium.GeoJson(
    geo_json_data,
    style_function=lambda feature: {
        'fillColor': '#ffff00',  # Fixed color for demonstration, consider dynamic coloring based on data
        'color': 'black',
        'weight': 0.5,
        'fillOpacity': 0.7
    },
    tooltip=folium.features.GeoJsonTooltip(
        fields=['name', 'average_review_count'],
        aliases=['Neighborhood:', 'Average Reviews:'],
        localize=True,
        sticky=False,
        labels=True,
        style="""
            background-color: #F0EFEF;
            border: 2px solid black;
            border-radius: 3px;
            box-shadow: 3px;
        """,
        max_width=250,
    )
).add_to(m)

# Add layer control to toggle on/off layers
folium.LayerControl().add_to(m)

# Save the map to an HTML file
map_file = 'neighborhood_review_count_map_tool.html'
m.save(map_file)
print(f"Map has been saved to: {map_file}")



TypeError: Object of type Timestamp is not JSON serializable

In [15]:
import geopandas as gpd

# Load the GeoJSON file
gdf_neighborhoods = gpd.read_file('philadelphia.geojson')

# Print the properties of the first feature to check the names
print(gdf_neighborhoods.iloc[0])

name                                             Pennypack Park
created_at                     2013-03-19 16:41:50.508000+00:00
updated_at                     2013-03-19 16:41:50.743000+00:00
cartodb_id                                                    1
geometry      MULTIPOLYGON (((-75.056455 40.087429, -75.0566...
Name: 0, dtype: object


In [None]:
import pandas as pd
import geopandas as gpd
import folium
import json

# Load the GeoJSON file as a GeoDataFrame
gdf_neighborhoods = gpd.read_file('philadelphia.geojson')

# Load the data
data = pd.read_csv('yelp_academic_dataset_with_price_rating.csv')

# Convert DataFrame to GeoDataFrame
gdf_data = gpd.GeoDataFrame(
    data, 
    geometry=gpd.points_from_xy(data.longitude, data.latitude),
    crs="EPSG:4326"  # Make sure this matches your GeoJSON CRS
)

# Perform spatial join - points to polygons
data_with_neighborhoods = gpd.sjoin(gdf_data, gdf_neighborhoods, how="inner", op='intersects')

# Assuming 'review_count' needs to be summed up
# Assuming 'data_with_neighborhoods' is your GeoDataFrame after the spatial join
data_grouped = data_with_neighborhoods.groupby('name_right')['review_count'].mean().reset_index()

# Rename the column to reflect that it contains averages
data_grouped.rename(columns={'review_count': 'average_review_count'}, inplace=True)

# Convert datetime columns to string for JSON serialization
for col in ['created_at', 'updated_at']:
    gdf_neighborhoods[col] = gdf_neighborhoods[col].astype(str)

# Merge GeoDataFrame with aggregated data
gdf_neighborhoods = gdf_neighborhoods.merge(data_grouped, left_on='name', right_on='name_right', how='left')

# Define a color scale
color_scale = cm.linear.YlOrRd_09.scale(
    gdf_neighborhoods['average_review_count'].min(),
    gdf_neighborhoods['average_review_count'].max()
)

# Create the Folium map
m = folium.Map(location=[39.9526, -75.1652], zoom_start=12)


# Add a GeoJson layer using the merged GeoDataFrame
folium.GeoJson(
    gdf_neighborhoods,
    style_function=lambda x: {'fillColor': '#ffff00', 'color': 'black', 'weight': 0.5, 'fillOpacity': 0.7},
    tooltip=folium.features.GeoJsonTooltip(
        fields=['name_right', 'average_review_count'],  # fields from the merged GeoDataFrame
        aliases=['Neighborhood:', 'Average Reviews:'],  # how you want the fields to be labeled in the tooltip
        style="""
            background-color: #F0EFEF;
            border: 2px solid black;
            border-radius: 3px;
            box-shadow: 3px;
        """,
        max_width=250,
    )
).add_to(m)

# Add a layer control to toggle on/off
folium.LayerControl().add_to(m)

# Save the map to an HTML file
map_file = 'trial10.html'
m.save(map_file)
print(f"Map has been saved to: {map_file}")
