In [12]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

import osmnx as ox
import networkx as nx
import geopandas as gpd
import overpy
from geopy.distance import distance
from geopy.geocoders import Nominatim
from shapely.geometry import Point

import folium

import warnings
warnings.filterwarnings("ignore")

PATH_DATA = "..\..\data"
pd.set_option('display.max_columns', 60)

api = overpy.Overpass()
geolocator = Nominatim(user_agent="MyApp")

In [13]:
vkusvill_data = pd.read_csv(os.path.join(PATH_DATA, "vkusvill_data_without_target.csv"))
vkusvill = vkusvill_data.copy()

# vkusvill_target = pd.read_csv(os.path.join(PATH_DATA, "vkusvill_target.csv"))
# vkusvill = pd.merge(vkusvill_data, vkusvill_target, on="Номерточки", how="left")

In [14]:
# Список объектов
# https://wiki.openstreetmap.org/wiki/RU:%D0%9E%D0%B1%D1%8A%D0%B5%D0%BA%D1%82%D1%8B_%D0%BA%D0%B0%D1%80%D1%82%D1%8B

tags = [
    {'highway': 'bus_stop'},

    {'building': 'apartments'},
    {'building': 'detached'},
    {'building': 'dormitory'},
    {'building': 'house'}, 
    {'building': 'residential'},
    {'building': 'semidetached_house'}, 

    {'building': 'commercial'},
    {'building': 'kiosk'}, 
    # {'building': 'office'},
    {'building': 'retail'},
    {"building": "supermarket"},
    {'building': 'train_station'},

    {'railway': 'station'},  
    {'railway': 'subway_entrance'},  
    {'railway': 'tram_stop'}, 

    {'amenity': 'bar'}, 
    {'amenity': 'cafe'}, 
    {'amenity': 'fast_food'}, 
    {'amenity': 'food_court'}, 
    {'amenity': 'pub'}, 
    {'amenity': 'restaurant'}, 

    {'amenity': 'college'},
    {'amenity': 'driving_school'},  
    {'amenity': 'language_school'},  
    {'amenity': 'school'},  
    {'amenity': 'kindergarten'}, 
    {'amenity': 'university'}, 

    {'amenity': 'car_wash'},
    {"amenity": "fuel"},

    {'amenity': 'atm'},  
    {'amenity': 'bank'},  

    {'amenity': 'clinic'}, 
    {'amenity': 'dentist'}, 
    {'amenity': 'doctors'}, 
    {'amenity': 'hospital'},  
    {'amenity': 'pharmacy'},
    {'amenity': 'veterinary'}, 
    
    {'amenity': 'theatre'},  
    {'amenity': 'cinema'},  
            
    {'tourism': 'hostel'},
    {'tourism': 'hotel'},
    ]

len(tags)

41

# Расстояние от объекта до центра города

In [15]:
cities = vkusvill["Город"].unique().tolist()
coordinates_cities = {}

In [16]:
for city in cities:
    if city == "Солнечногорский р-он, д. Подолино":
        latitude, longitude = (55.9320362, 37.2478592)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Павловская Слобода":
        latitude, longitude = (55.813991, 37.075951)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Сабурово":
        latitude, longitude = (55.882151, 37.264067)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Новинки":
        latitude, longitude = (55.810157, 37.137701)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})") 

    else:
        location = geolocator.geocode(city)
        coordinates_cities[city] = (location.latitude, location.longitude)
        print(f"{location.address}: ({location.latitude}, {location.longitude})")

Нахабино, городской округ Красногорск, Московская область, Центральный федеральный округ, Россия: (55.8394789, 37.1770987)
Некрасовский, Дмитровский городской округ, Московская область, Центральный федеральный округ, 141865, Россия: (56.0952846, 37.4993722)
Москва, Центральный федеральный округ, Россия: (55.7504461, 37.6174943)
Мытищи, городской округ Мытищи, Московская область, Центральный федеральный округ, Россия: (55.9094928, 37.7339358)
Химки, городской округ Химки, Московская область, Центральный федеральный округ, Россия: (55.8917293, 37.4396994)
Красногорск, городской округ Красногорск, Московская область, Центральный федеральный округ, 143405, Россия: (55.8217545, 37.3400897)
Зеленоград, Москва, Центральный федеральный округ, Россия: (55.9964233, 37.198534)
Долгопрудный, городской округ Долгопрудный, Московская область, Центральный федеральный округ, Россия: (55.9341491, 37.5142417)
Пушкино, Пушкинский городской округ, Московская область, Центральный федеральный округ, Россия:

In [17]:
vkusvill["dist_to_city_center"] = 0
for idx, row in vkusvill.iterrows():
    vkusvill.loc[idx, "dist_to_city_center"] = distance(coordinates_cities[row["Город"]], (row["Широта"], row["Долгота"])).km

In [18]:
vkusvill["dist_to_city_center"].describe()

count    298.000000
mean       7.532783
std        5.859974
min        0.107769
25%        2.113113
50%        6.682216
75%       12.341333
max       22.462504
Name: dist_to_city_center, dtype: float64

In [19]:
# vkusvill.to_csv(os.path.join(PATH_DATA, "vkusvill_city.csv"), index=False)

# Выгрузка данных по изохронам

In [20]:
def get_isochrone(lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None):
    loc = (lat, lon)
    G = ox.graph_from_point(loc, simplify=True, network_type="walk")
    gdf_nodes = ox.graph_to_gdfs(G, edges=False)
    center_node = ox.distance.nearest_nodes(G, lon, lat)

    meters_per_minute = speed * 1000 / 60 
    for u, v, k, data in G.edges(data=True, keys=True):
        data["time"] = data["length"] / meters_per_minute

    polys = []
    for walk_time in walk_times:
        subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
        node_points = [Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)]
        polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)

    info = {}
    if name:
        info["name"] = [name for _ in walk_times]
    if point_index:
        info["point_index"] = [point_index for _ in walk_times]
    return {**{"geometry": polys, "time": walk_times}, **info}

In [25]:
m = folium.Map(location=[vkusvill["Широта"].mean(), vkusvill["Долгота"].mean()], zoom_start=10)

vkusvill["isochrone"] = None
for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    isochrone = get_isochrone(
        row["Долгота"],
        row["Широта"],
        name=row["Номерточки"],
        point_index=idx,
        walk_times=[15],
    )
    vkusvill.loc[idx, "isochrone"] = isochrone["geometry"][0]

    folium.Marker(location=[row["Широта"], row["Долгота"]]).add_to(m)
    folium.GeoJson(isochrone["geometry"][0]).add_to(m)


100%|██████████| 298/298 [14:12<00:00,  2.86s/it]


In [26]:
m

In [50]:
for tag in tqdm(tags, leave=len(tags)):
    key = list(tag.keys())[0]
    value = list(tag.values())[0]
    vkusvill[value] = None
    
    for idx, row in vkusvill.iterrows():
        xx, yy = row["isochrone"].exterior.coords.xy 
        polygon = " ".join([f"{y} {x}" for y, x in zip(yy, xx)])
        
        data = api.query(f"""
        <query type="node">
            <polygon-query bounds="{polygon}" into="_"/>
            <has-kv k="{key}" v="{value}"/>
        </query>
        <print/>
        """)
        vkusvill.loc[idx, value] = len(data.nodes)

100%|██████████| 41/41 [45:24<00:00, 66.46s/it] 


In [51]:
vkusvill['office'] = None
vkusvill['shop'] = None

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    xx, yy = row["isochrone"].exterior.coords.xy 
    polygon = " ".join([f"{y} {x}" for y, x in zip(yy, xx)])

    office = api.query(f"""
    <query type="node">
        <polygon-query bounds="{polygon}" into="_"/>
        <has-kv k="office"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'office'] = len(office.nodes)

    shop = api.query(f"""
    <query type="node">
        <polygon-query bounds="{polygon}" into="_"/>
        <has-kv k="shop"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'shop'] = len(shop.nodes)

100%|██████████| 298/298 [03:19<00:00,  1.50it/s]


In [52]:
vkusvill.shape

(298, 56)

In [55]:
for col in vkusvill.columns:
    if vkusvill[col].nunique() == 1:
        vkusvill.drop(col, axis=1, inplace=True)

In [56]:
vkusvill.shape 

(298, 48)

In [58]:
vkusvill.head(5)

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,dist_to_city_center,isochrone,bus_stop,house,kiosk,retail,station,subway_entrance,tram_stop,bar,cafe,fast_food,food_court,pub,restaurant,college,driving_school,language_school,school,kindergarten,university,car_wash,fuel,atm,bank,clinic,dentist,doctors,hospital,pharmacy,veterinary,theatre,cinema,hostel,hotel,office,shop
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,1.55043,"POLYGON ((37.1866462 55.8432884, 37.1865743 55...",5,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,3,0,0,2,0,1,0,0,3,0,0,0,0,0,0,46
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,4.039671,"POLYGON ((37.5144748 56.1251478, 37.4933069 56...",2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,7
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,3.216366,"POLYGON ((37.5747034 55.7558251, 37.5664803 55...",52,0,10,0,3,7,0,7,60,29,0,5,25,0,1,1,1,3,1,2,1,9,25,18,13,5,1,18,2,2,0,3,5,36,293
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,1.080211,"POLYGON ((37.713027 55.908962, 37.7117648 55.9...",19,0,0,0,0,0,0,2,3,4,1,1,3,0,0,1,0,3,0,0,0,2,3,0,1,2,1,7,0,0,0,1,0,4,60
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.45266,с 8-00 до 22-00,Нет,0.926878,"POLYGON ((37.4519693 55.8871594, 37.4447598 55...",34,0,10,1,1,0,0,3,16,20,0,2,6,0,3,0,0,0,0,2,0,6,6,2,7,6,0,23,2,0,0,0,1,17,184


In [59]:
# vkusvill.drop("isochrone", axis=1).to_csv(os.path.join(PATH_DATA, "vkusvill_new_features_v2.csv"), index=False)

# Выгрузка данных в радиусе объекта

In [224]:
R = 1000

for tag in tqdm(tags, leave=len(tags)):
    key = list(tag.keys())[0]
    value = list(tag.values())[0]
    vkusvill[value] = None
    
    for idx, row in vkusvill.iterrows():
        data = api.query(f"""
        <query type="node">
            <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
            <has-kv k="{key}" v="{value}"/>
        </query>
        <print/>
        """)
        vkusvill.loc[idx, value] = len(data.nodes)

100%|██████████| 41/41 [40:11<00:00, 58.82s/it]


In [225]:
vkusvill['office'] = None
vkusvill['shop'] = None

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    office = api.query(f"""
    <query type="node">
        <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
        <has-kv k="office"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'office'] = len(office.nodes)

    shop = api.query(f"""
    <query type="node">
        <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
        <has-kv k="shop"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'shop'] = len(shop.nodes)

298it [02:21,  2.11it/s]


In [250]:
vkusvill.shape

(298, 54)

In [251]:
for col in vkusvill.columns:
    if vkusvill[col].nunique() == 1:
        vkusvill.drop(col, axis=1, inplace=True)

In [252]:
vkusvill.shape

(298, 46)

In [253]:
vkusvill

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,fuel,bus_stop,house,kiosk,retail,station,subway_entrance,tram_stop,bar,cafe,fast_food,food_court,pub,restaurant,college,driving_school,language_school,school,kindergarten,university,car_wash,atm,bank,clinic,dentist,doctors,hospital,pharmacy,veterinary,theatre,cinema,hostel,hotel,office,shop
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,0,8,0,1,0,0,0,0,0,2,5,0,0,1,0,0,0,0,1,0,4,0,2,2,2,1,0,5,0,0,0,0,0,0,91
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,9
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,1,61,0,11,0,3,7,0,16,75,37,1,5,40,1,1,2,1,4,2,5,16,26,20,17,6,2,25,3,2,0,5,7,46,390
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,1,45,0,0,0,0,0,0,2,7,8,1,1,6,0,0,1,0,5,0,2,5,9,5,3,5,2,14,0,0,1,1,1,17,184
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.452660,с 8-00 до 22-00,Нет,0,36,0,11,1,1,0,0,3,18,22,0,2,6,0,3,0,0,0,0,3,8,9,2,9,6,0,30,2,0,0,0,1,22,229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,1308М_Лоб_Ленина67,1308,44494,370.0,Московская область,Лобня,"г. Лобня, ул. Ленина д.67",56.008319,37.449659,с 9-00 до 22-00,Нет,0,21,0,0,0,0,0,0,0,3,1,0,0,3,0,2,0,0,0,0,2,3,2,5,2,3,0,5,0,1,0,0,0,10,78
294,5348М_Ист_Ленина65,5348,44446,310.0,Московская область,Истра,"ул. Ленина, дом № 65",55.912244,36.859008,с 7-00 до 23-00,Нет,0,20,0,0,0,0,0,0,1,16,19,0,2,4,0,2,1,0,0,0,0,13,7,3,2,7,0,15,1,1,1,0,0,31,205
295,3072М_БольшаяСпасская8,3072,43901,367.7,Москва,Москва,"ул. Большая Спасская , д. 8, стр. 1А",55.775839,37.644171,с 7:00 до 23:45,Да,1,43,0,13,0,9,23,19,13,86,72,3,4,49,0,1,2,1,2,1,4,45,38,18,11,5,0,28,1,6,0,10,16,143,411
296,3903М_Красн_Подмосковный2,3903,43976,371.9,Московская область,Красногорск,"б-р Подмосковный, д. 2 + ""ВкусВиллАйс""",55.819236,37.365523,с 7:00 до 23:45,Да,0,27,0,0,0,0,0,0,4,24,10,0,3,6,0,0,5,0,2,0,6,8,5,11,9,6,0,29,1,0,0,3,2,10,278


In [3]:
data = api.query("""
<query type="node">
    <around lat="55.763797" lon="37.572064" radius="1000"/>
    <has-kv k="amenity" v="pharmacy"/>
</query>
<print/>
""")
	
print(len(data.nodes))
data.nodes

# https://www.openstreetmap.org/node/id_object

25


[<overpy.Node id=708691902 lat=55.7635012 lon=37.5571529>,
 <overpy.Node id=3212575362 lat=55.7642062 lon=37.5603926>,
 <overpy.Node id=3214920431 lat=55.7653716 lon=37.5644551>,
 <overpy.Node id=3214921041 lat=55.7650991 lon=37.5638257>,
 <overpy.Node id=3224714882 lat=55.7619286 lon=37.5634722>,
 <overpy.Node id=3231542845 lat=55.7612178 lon=37.5711785>,
 <overpy.Node id=3661818210 lat=55.7718137 lon=37.5787271>,
 <overpy.Node id=4076692489 lat=55.7637502 lon=37.5639871>,
 <overpy.Node id=4082482989 lat=55.7604141 lon=37.5677605>,
 <overpy.Node id=4121408789 lat=55.7636830 lon=37.5600832>,
 <overpy.Node id=4123545091 lat=55.7693400 lon=37.5828251>,
 <overpy.Node id=4514947389 lat=55.7618186 lon=37.5625323>,
 <overpy.Node id=4620215805 lat=55.7635354 lon=37.5718864>,
 <overpy.Node id=4620215806 lat=55.7633607 lon=37.5713126>,
 <overpy.Node id=5997704384 lat=55.7622911 lon=37.5680359>,
 <overpy.Node id=6103668182 lat=55.7689122 lon=37.5729190>,
 <overpy.Node id=6177029420 lat=55.756859

In [254]:
# vkusvill.to_csv(os.path.join(PATH_DATA, "vkusvill_new_features.csv"), index=False)