In [None]:
convert KML file to CSV file

In [None]:
from google.colab import files, output
up = files.upload()           # choose your .kml / .kmz
in_path = next(iter(up.keys()))          # first uploaded file
print(f"Uploaded: {in_path}")

!pip -q install geopandas pyproj fiona shapely

import geopandas as gpd, pathlib, pandas as pd

gdf = gpd.read_file(in_path, driver="KML")      # EPSG:4326
if gdf.geom_type.isin(["MultiPoint","MultiLineString","MultiPolygon"]).any():
    gdf = gdf.explode(index_parts=False).reset_index(drop=True)

gdf["lon"] = gdf.geometry.x
gdf["lat"] = gdf.geometry.y
gdf = gdf.drop(columns="geometry")

out_path = pathlib.Path(in_path).with_suffix(".csv").name
gdf.to_csv(out_path, index=False)
print(f"✅  Saved {len(gdf):,} rows → {out_path}")

files.download(out_path)

joining 2 tables in names, to get lon and lat

In [None]:
import pandas as pd, re, sys
from google.colab import files

def prompt_choice(options, prompt):
   for i, col in enumerate(options, start=1):
       print(f"{i}. {col}")
   while True:
       try:
           idx = int(input(prompt)) - 1
           return options[idx]
       except (ValueError, IndexError):
           print("❌  Invalid choice, try again.")

def clean_key(s):
   return str(s).upper().strip()

print("⬆️  Upload your ATTRIBUTE CSV (pattern, mark, dates …)")
attr_path = next(iter(files.upload()))
attr_df = pd.read_csv(attr_path)
print("\nAttribute columns:")
attr_cols = attr_df.columns.tolist()
attr_name_col = prompt_choice(attr_cols, "Select restaurant-name column # → ")

print("\n⬆️  Upload your COORDINATE CSV (Name + lon + lat) …")
coord_path = next(iter(files.upload()))
coord_df = pd.read_csv(coord_path)
print("\nCoordinate columns:")
coord_cols = coord_df.columns.tolist()
coord_name_col = prompt_choice(coord_cols, "Select name column # → ")
coord_lon_col = prompt_choice(coord_cols, "Select longitude column # → ")
coord_lat_col = prompt_choice(coord_cols, "Select latitude  column # → ")

attr_df['__key'] = attr_df[attr_name_col].apply(clean_key)
coord_df['__key'] = coord_df[coord_name_col].apply(clean_key)

merged = (
   coord_df[['__key', coord_lon_col, coord_lat_col]]
   .rename(columns={coord_lon_col:'lon', coord_lat_col:'lat'})
   .merge(attr_df.drop(columns=[attr_name_col]),
          on='__key', how='inner')
)

print(f"\n✅  Matched rows: {len(merged):,}")
out_csv = 'restaurants_joined.csv'
merged.to_csv(out_csv, index=False)
files.download(out_csv)
print("💾 Download triggered:", out_csv)

clustering algorithms

In [None]:
```python
from google.colab import files
print("⬆️  Choose restaurants_joined.csv …")
csv_path = next(iter(files.upload()))
print("✔️  Uploaded:", csv_path)

!pip -q install geopandas pyproj shapely fiona scikit-learn esda libpysal numba

import pandas as pd, geopandas as gpd
from shapely.geometry import Point

LON_COL, LAT_COL = 'lon', 'lat'

df = pd.read_csv(csv_path)
gdf = gpd.GeoDataFrame(
    df,
    geometry=[Point(xy) for xy in zip(df[LON_COL], df[LAT_COL])],
    crs='EPSG:4326'
).to_crs('EPSG:5070')

coords = gdf.geometry.apply(lambda p: (p.x, p.y)).tolist()
print(f"Points loaded: {len(gdf):,}")

from sklearn.cluster import DBSCAN
import numpy as np
eps_m, minpts = 50000, 5

db = DBSCAN(eps=eps_m, min_samples=minpts, metric='euclidean')
gdf['cluster_id'] = db.fit_predict(np.array(coords))

db_file = 'dbscan_clusters.gpkg'
gdf[['cluster_id','geometry']].to_file(db_file, driver='GPKG')
files.download(db_file)
print(f"DBSCAN clusters saved → {db_file}")

from libpysal.weights import DistanceBand
from esda.getisord import G_Local

w = DistanceBand.from_dataframe(gdf, threshold=eps_m, silence_warnings=True)

gdf['ones'] = 1.0
gi = G_Local(gdf['ones'], w, transform='B', n_jobs=1)

gdf['GiZ'] = gi.Zs
gdf['GiP'] = gi.p_sim

gi_file = 'gi_hotspots.gpkg'
gdf[['GiZ','GiP','geometry']].to_file(gi_file, driver='GPKG')
files.download(gi_file)
print(f"Gi* hotspots saved → {gi_file}")
```

clustering results

In [None]:
import geopandas as gpd
import pandas as pd
from pathlib import Path

gdf_clu = gpd.read_file('dbscan_clusters.gpkg')
gdf_gi = gpd.read_file('gi_hotspots.gpkg')

gdf = gdf_clu.join(gdf_gi[['GiZ','GiP']])

n_total = len(gdf)
n_noise = (gdf.cluster_id == -1).sum()
n_clusters = gdf.cluster_id.nunique() - (1 if -1 in gdf.cluster_id.values else 0)

hot = gdf[gdf.GiZ >= 1.96]
cold = gdf[gdf.GiZ <= -1.96]

from sklearn.neighbors import BallTree
import numpy as np

coords = np.array(list(zip(gdf.geometry.x, gdf.geometry.y)))
tree, dist = BallTree(coords, metric='euclidean'), None
dist, _ = tree.query(coords, k=2)
mean_obs = dist[:,1].mean()
xmin,ymin,xmax,ymax = gdf.total_bounds
area_m2 = (xmax-xmin)*(ymax-ymin)
mean_exp = 0.5 / np.sqrt(len(gdf)/area_m2)
R = mean_obs / mean_exp

print(f"""----- SUMMARY -----
Total points           : {n_total}
DBSCAN noise points    : {n_noise}
DBSCAN clusters found  : {n_clusters}

Hot-spot points (GiZ ≥ 1.96)  : {len(hot)}
Cold-spot points (GiZ ≤ –1.96): {len(cold)}
Max hot-spot z-score          : {hot.GiZ.max():.2f}
Min cold-spot z-score         : {cold.GiZ.min():.2f}

Nearest-Neighbour R           : {R:.2f}
------------------------------""")

summary = {
   'total_points' : [n_total],
   'noise_points' : [n_noise],
   'clusters' : [n_clusters],
   'hot_points' : [len(hot)],
   'cold_points' : [len(cold)],
   'max_hot_z' : [hot.GiZ.max()],
   'min_cold_z' : [cold.GiZ.min()],
   'NNI_R' : [R]
}
pd.DataFrame(summary).to_csv('stats_summary.csv', index=False)
from google.colab import files; files.download('stats_summary.csv')