In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from datetime import datetime
import numpy as np

In [2]:
# --- 1) Parameters ---
radius_m = 200

In [3]:
# --- 2) Load your grid (interpolated traffic + zip/pop columns) ---
grid = pd.read_csv("pedestrian_with_zip_stats.csv")
grid_gdf = gpd.GeoDataFrame(
    grid,
    geometry=gpd.points_from_xy(grid.longitude, grid.latitude),
    crs="EPSG:4326"
).to_crs(epsg=3857)


In [4]:
# --- 3) Load your restaurants dataset ---
rest = pd.read_csv(
    "mergedFinal.csv",
    parse_dates=["Opening_Date", "Closing_Date"],  # match your column names
    dayfirst=True
)
rest = rest.drop(columns=["Duration_in_Days", "Status"])

# rename columns for consistency
rest = rest.rename(columns={
    "Opening_Date": "startdate",
    "Closing_Date": "enddate",
    "Price_Level": "price_level"
})
rest_gdf = gpd.GeoDataFrame(
    rest,
    geometry=gpd.points_from_xy(rest.Longitude, rest.Latitude),
    crs="EPSG:4326"
).to_crs(epsg=3857)

In [5]:
# --- 4) Compute each restaurant’s survival time in days ---
today = pd.Timestamp(datetime.today())
rest_gdf["survival_days"] = (rest_gdf["enddate"].fillna(today) - rest_gdf["startdate"]).dt.days


In [6]:
# --- 5) Build a buffer polygon around each grid point ---
buffers = grid_gdf.copy()
buffers["geometry"] = grid_gdf.geometry.buffer(radius_m)

In [7]:
# --- 6) Spatial‐join restaurants INTO buffers ---
joined = gpd.sjoin(
    rest_gdf,
    buffers[["geometry"]],
    how="inner",
    predicate="within"
).rename(columns={"index_right":"grid_index"})

In [8]:
# --- 7) Aggregate stats per grid_index ---
def agg_stats(df):
    now3 = today - pd.DateOffset(years=3)

    # basic survival & counts
    total = len(df)
    closed = df["enddate"].notna().sum()
    open_ = total - closed
    sd = df["survival_days"]

    # new: mean rating, reviews, price level
    mean_rating = df["Rating"].mean()
    mean_reviews = df["Number_of_Reviews"].mean()
    mean_price = df["price_level"].mean()

    return pd.Series({
        "num_restaurants_total": total,
        "num_restaurants_open": open_,
        "num_restaurants_closed": closed,
        "avg_survival_days": sd.mean(),
        "median_survival_days": sd.median(),
        "avg_survival_months": sd.mean() / 30.44,
        "avg_survival_years": sd.mean() / 365.25,
        "openings_last_3_years": (df["startdate"] >= now3).sum(),
        "closures_last_3_years": (df["enddate"] >= now3).sum(),
        # new aggregated metrics
        "mean_rating": mean_rating,
        "mean_reviews": mean_reviews,
        "mean_price_level": mean_price,
    })

stats = joined.groupby("grid_index").apply(agg_stats)

In [9]:
# --- 8) Compute code‐counts pivot table and join to stats ---
code_counts = (
    joined.groupby(["grid_index", "Branchekod"])
          .size()
          .unstack(fill_value=0)
          .add_prefix("code_")
)
stats = stats.join(code_counts, how="left").fillna(0)

In [10]:
# --- 9) Merge stats back onto your grid points ---
result = grid_gdf.join(stats, how="left").fillna({
    # fill zeros for numeric fields
    **{c: 0 for c in stats.columns}
})

In [11]:
# --- 10) Reproject back to lat/lon ---
out = result.to_crs(epsg=4326)
out["latitude"]  = out.geometry.y
out["longitude"] = out.geometry.x

In [12]:
# --- 11) Select only the requested columns plus new ones ---
final_cols = [
    "latitude","longitude",
    "aadt_fod_7_19","hvdt_fod_7_19",
    "postal_code","Total","Men","Women","population_density_km2",
    # old survival & count stats
    "num_restaurants_total","num_restaurants_open","num_restaurants_closed",
    "avg_survival_days","median_survival_days",
    "avg_survival_months","avg_survival_years",
    "openings_last_3_years","closures_last_3_years",
    # new means
    "mean_rating","mean_reviews","mean_price_level",
    # all the dynamic code_* columns
] + [col for col in stats.columns if col.startswith("code_")]

final_df = out[final_cols].copy()


In [13]:
final_df.head()

Unnamed: 0,latitude,longitude,aadt_fod_7_19,hvdt_fod_7_19,postal_code,Total,Men,Women,population_density_km2,num_restaurants_total,...,avg_survival_months,avg_survival_years,openings_last_3_years,closures_last_3_years,mean_rating,mean_reviews,mean_price_level,code_561110,code_561190,code_563010
0,55.70734,12.454837,1305.544721,1333.896656,2700,45950.0,22892.0,23058.0,5603.658537,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,55.708353,12.454837,1207.56447,1232.454639,2700,45950.0,22892.0,23058.0,5603.658537,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,55.709365,12.454837,1095.344012,1116.32109,2700,45950.0,22892.0,23058.0,5603.658537,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,55.710377,12.454837,994.907609,1012.386543,2700,45950.0,22892.0,23058.0,5603.658537,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,55.711389,12.454837,936.447968,951.898583,2700,45950.0,22892.0,23058.0,5603.658537,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
# --- 12) Save ---
final_df.to_csv("df_survival_analysis.csv", index=False)
print("Saved grid_with_all_restaurant_stats.csv")

Saved grid_with_all_restaurant_stats.csv
