In [None]:
import osmnx as ox
import numpy as np
from sklearn.cluster import DBSCAN
from shapely.geometry import Point
from pathlib import Path
import os
import geopandas as gpd
from geopy.geocoders import Nominatim
from loguru import logger

from mclights.get_data import get_data_osmx_or_local
from mclights.coordinate import convert_to_points, get_city

project_dir = Path(os.getcwd()).parent
data_dir = project_dir / "data"
temp_data_dir = project_dir / "temp_data"
raw_data_dir = project_dir / "raw_data"

mcdonalds_path = data_dir / "mcdonalds.gpkg"

In [None]:
names_combination = ["McDonald's", "McDonald´s", "McDonald’s"]

mcdonalds = get_data_osmx_or_local(
    raw_data_dir / "mcdonalds.gpkg",
    {"amenity": "fast_food", "name": names_combination}
)

mcdonalds = mcdonalds[mcdonalds["name"].isin(names_combination)]
mcdonalds = mcdonalds[~mcdonalds["geometry"].isna()]
mcdonalds = mcdonalds[["geometry", "addr:city"]]
mcdonalds = mcdonalds.rename(columns={"addr:city": "city_name"})

mcdonalds["geometry"] = mcdonalds["geometry"].apply(convert_to_points)

geolocator = Nominatim(user_agent="mcdonalds_locator")
mask_missing_cities = mcdonalds["city_name"].isna()
logger.info(
    f"There are {mask_missing_cities.sum()} missing cities out of {len(mcdonalds)} locations, "
    f"which is {mask_missing_cities.sum()/len(mcdonalds):.1%}."
)

mcdonalds = mcdonalds.to_crs(epsg=4326)
mcdonalds.loc[mask_missing_cities, "city_name"] = mcdonalds.loc[mask_missing_cities, "geometry"].apply(
    lambda geom: get_city(geom, geolocator)
)
mcdonalds = mcdonalds.to_crs(epsg=2180)

# I cannot detect Mc'Donald in Lubieszyn for some reason
lubieszyn_missing_mask = (
    (mcdonalds.geometry.distance(Point(194277.344, 630278.733)) < 500)
    & mcdonalds["city_name"].isna()
)
mcdonalds.loc[lubieszyn_missing_mask, "city_name"] = "Lubieszyn"

mcdonalds.to_file(mcdonalds_path, layer=mcdonalds_path.stem, driver="GPKG")

In [None]:
mcdonalds_duplicates = gpd.read_file(mcdonalds_path, layer=mcdonalds_path.stem)

coords = list(zip(mcdonalds_duplicates.geometry.x, mcdonalds_duplicates.geometry.y))
db = DBSCAN(eps=300, min_samples=1)
labels = db.fit_predict(coords)
mcdonalds_duplicates['cluster_id'] = labels

mcdonalds_duplicates = mcdonalds_duplicates.dissolve(
    by="cluster_id",
    aggfunc={"city_name": lambda x: list(set(x))[0]}
)

mcdonalds_duplicates["geometry"] = mcdonalds_duplicates.geometry.centroid
mcdonalds_duplicates = mcdonalds_duplicates.reset_index()
mcdonalds_duplicates = mcdonalds_duplicates.drop(columns=["cluster_id"])

In [None]:
# load motorways

In [None]:
mcdonalds_motorways = mcdonalds_duplicates.copy()
# use DBSCAN to detect mcdonalds closer than 500 meters
# calculate distance between mcdonalds and motorways
# if mcdonalds are close enough and are close to motorway, like idk 300 meters treat them as one
# add a flag "hightway_proximity" or smth

In [None]:
voivodenships_file_path = data_dir / "voivodeships.gpkg"
voivodenships = gpd.read_file(voivodenships_file_path, layer=voivodenships_file_path.stem)

In [None]:
import matplotlib.pyplot as plt

temp = mcdonalds_duplicates.copy()
fig, ax = plt.subplots(figsize=(10, 10))

temp.plot(ax=ax)
voivodenships.plot(ax=ax, facecolor="None")
plt.show()

temp