In [1]:
import os
import time
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm

import osmnx as ox
import networkx as nx
import geopandas as gpd
import overpy
from geopy.distance import distance
from geopy.geocoders import Nominatim
from shapely.geometry import Point

import folium

import warnings
warnings.filterwarnings("ignore")

PATH_DATA = "..\..\data"
pd.set_option('display.max_columns', 60)

api = overpy.Overpass()
geolocator = Nominatim(user_agent="MyApp")

In [2]:
vkusvill_data = pd.read_csv(os.path.join(PATH_DATA, "vkusvill_data_without_target.csv"))
vkusvill = vkusvill_data.copy()

In [3]:
# Список объектов
# https://wiki.openstreetmap.org/wiki/RU:%D0%9E%D0%B1%D1%8A%D0%B5%D0%BA%D1%82%D1%8B_%D0%BA%D0%B0%D1%80%D1%82%D1%8B

tags = [
    {'highway': 'bus_stop'},

    {'building': 'apartments'},
    {'building': 'detached'},
    {'building': 'dormitory'},
    {'building': 'house'}, 
    {'building': 'residential'},
    {'building': 'semidetached_house'}, 

    {'building': 'commercial'},
    {'building': 'kiosk'}, 
    # {'building': 'office'},
    {'building': 'retail'},
    {"building": "supermarket"},
    {'building': 'train_station'},

    {'railway': 'station'},  
    {'railway': 'subway_entrance'},  
    {'railway': 'tram_stop'}, 

    {'amenity': 'bar'}, 
    {'amenity': 'cafe'}, 
    {'amenity': 'fast_food'}, 
    {'amenity': 'food_court'}, 
    {'amenity': 'pub'}, 
    {'amenity': 'restaurant'}, 

    {'amenity': 'college'},
    {'amenity': 'driving_school'},  
    {'amenity': 'language_school'},  
    {'amenity': 'school'},  
    {'amenity': 'kindergarten'}, 
    {'amenity': 'university'}, 

    {'amenity': 'car_wash'},
    {"amenity": "fuel"},

    {'amenity': 'atm'},  
    {'amenity': 'bank'},  

    {'amenity': 'clinic'}, 
    {'amenity': 'dentist'}, 
    {'amenity': 'doctors'}, 
    {'amenity': 'hospital'},  
    {'amenity': 'pharmacy'},
    {'amenity': 'veterinary'}, 
    
    {'amenity': 'theatre'},  
    {'amenity': 'cinema'},  
            
    {'tourism': 'hostel'},
    {'tourism': 'hotel'},
    ]

len(tags)

41

# Расстояние до шоссе

In [5]:
R = 10000

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    ways = api.query(f"""
        <query type="way">
            <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
            <has-kv k="highway" regv="motorway|trunk|primary"/>
        </query>
        <print geometry="center"/>
        """)
    
    highways = []
    for way in ways.ways:
        id_way = way.id
        info_way= vars(ways.get_way(id_way))
        if "name" in info_way["tags"]:
            if "шоссе" in info_way["tags"]["name"].lower():
                highways.append([info_way["tags"]["name"], info_way["center_lat"], info_way["center_lon"]])

    name_way, dist_way, time_way = [], [], []
    if highways:
        for highway in highways:
            url = f"https://router.project-osrm.org/route/v1/driving/{highway[2]},{highway[1]};{row['Долгота']},{row['Широта']}"
            response = requests.get(url)
            time.sleep(1)

            name_way.append(highway[0])
            time_way.append(response.json()['routes'][0]['duration'])
            dist_way.append(response.json()['routes'][0]['distance']) 

        vkusvill.loc[idx, "name_highway"] = name_way[np.argmin(dist_way)]
        vkusvill.loc[idx, "time_to_highway"] = time_way[np.argmin(dist_way)]
        vkusvill.loc[idx, "dist_to_highway"] = np.min(dist_way)

    else:
        vkusvill.loc[idx, "name_highway"] = None
        vkusvill.loc[idx, "time_to_highway"] = None
        vkusvill.loc[idx, "dist_to_highway"] = None

100%|██████████| 298/298 [37:33:10<00:00, 453.66s/it]   


In [9]:
vkusvill[vkusvill["name_highway"].isna()]

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,name_highway,time_to_highway,dist_to_highway
40,1210М_Пуш_Вокзальная1,1210,42746,112.0,Московская область,Пушкино,"ул. Вокзальная, Павильон (Московский проспект 1)",56.011424,37.840624,с 7-00 до 22-00,Нет,,,
181,1496М_Пуш_Чехова1,1496,43170,139.5,Московская область,Пушкино,"ул. Чехова, д. 1к2",56.009154,37.855141,с 8-00 до 22-00,Нет,,,


In [10]:
R = 15000

for idx, row in tqdm(
    vkusvill[vkusvill["name_highway"].isna()].iterrows(), 
    leave=vkusvill[vkusvill["name_highway"].isna()].shape[0], 
    total=vkusvill[vkusvill["name_highway"].isna()].shape[0]
    ):

    ways = api.query(f"""
        <query type="way">
            <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
            <has-kv k="highway" regv="motorway|trunk|primary"/>
        </query>
        <print geometry="center"/>
        """)
    
    highways = []
    for way in ways.ways:
        id_way = way.id
        info_way= vars(ways.get_way(id_way))
        if "name" in info_way["tags"]:
            if "шоссе" in info_way["tags"]["name"].lower():
                highways.append([info_way["tags"]["name"], info_way["center_lat"], info_way["center_lon"]])

    name_way, dist_way, time_way = [], [], []
    if highways:
        for highway in highways:
            url = f"https://router.project-osrm.org/route/v1/driving/{highway[2]},{highway[1]};{row['Долгота']},{row['Широта']}"
            response = requests.get(url)
            time.sleep(1)

            name_way.append(highway[0])
            time_way.append(response.json()['routes'][0]['duration'])
            dist_way.append(response.json()['routes'][0]['distance']) 

        vkusvill.loc[idx, "name_highway"] = name_way[np.argmin(dist_way)]
        vkusvill.loc[idx, "time_to_highway"] = time_way[np.argmin(dist_way)]
        vkusvill.loc[idx, "dist_to_highway"] = np.min(dist_way)

    else:
        vkusvill.loc[idx, "name_highway"] = None
        vkusvill.loc[idx, "time_to_highway"] = None
        vkusvill.loc[idx, "dist_to_highway"] = None

100%|██████████| 2/2 [01:06<00:00, 33.21s/it]


In [16]:
vkusvill.head(5)

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,name_highway,time_to_highway,dist_to_highway
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,Волоколамское шоссе,236.6,2091.5
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,Дмитровское шоссе,123.0,805.9
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,Звенигородское шоссе,105.7,1040.1
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,Ярославское шоссе,403.1,4666.2
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.45266,с 8-00 до 22-00,Нет,Ленинградское шоссе,245.0,2671.3


In [14]:
result = vkusvill[["Номерточки", "name_highway", "time_to_highway", "dist_to_highway"]]
result["dist_to_highway"] /= 1000
result["time_to_highway_m"] = result["time_to_highway"] / 60

In [15]:
# result.to_csv(os.path.join(PATH_DATA, "vkusvill_highway.csv"), index=False)

# Признак на основе модели Хаффа

[Статья про модель Хаффа](https://www.cfin.ru/press/practical/2007-10/01.shtml)  
[Получение признака времени пути](https://matrunich.com/blog/2014/06/18/calculate_distance_between_addresses/)

In [56]:
cities = vkusvill["Город"].unique().tolist()
coordinates_cities = {}

for city in cities:
    if city == "Солнечногорский р-он, д. Подолино":
        latitude, longitude = (55.9320362, 37.2478592)
        coordinates_cities[city] = (latitude, longitude)

    elif city == "Павловская Слобода":
        latitude, longitude = (55.813991, 37.075951)
        coordinates_cities[city] = (latitude, longitude)

    elif city == "Сабурово":
        latitude, longitude = (55.882151, 37.264067)
        coordinates_cities[city] = (latitude, longitude)

    elif city == "Новинки":
        latitude, longitude = (55.810157, 37.137701)
        coordinates_cities[city] = (latitude, longitude) 

    else:
        location = geolocator.geocode(city)
        coordinates_cities[city] = (location.latitude, location.longitude)

In [57]:
coordinates_cities

{'Нахабино': (55.8394789, 37.1770987),
 'Некрасовский': (56.0952846, 37.4993722),
 'Москва': (55.7504461, 37.6174943),
 'Мытищи': (55.9094928, 37.7339358),
 'Химки': (55.8917293, 37.4396994),
 'Красногорск': (55.8217545, 37.3400897),
 'Зеленоград': (55.9964233, 37.198534),
 'Долгопрудный': (55.9341491, 37.5142417),
 'Пушкино': (56.0104274, 37.8461892),
 'Солнечногорский р-он, д. Подолино': (55.9320362, 37.2478592),
 'Королев': (55.9204898, 37.8326289),
 'Истра': (55.9145993, 36.8595482),
 'Юбилейный': (55.9368065, 37.8435779),
 'Путилково': (55.8625, 37.390278),
 'Ивантеевка': (55.9714098, 37.9200739),
 'Сабурово': (55.882151, 37.264067),
 'Клин': (56.3355601, 36.7351318),
 'Лобня': (56.0104473, 37.4670831),
 'Новинки': (55.810157, 37.137701),
 'Сергиев Посад': (56.3153529, 38.1358208),
 'Яхрома': (56.289936, 37.4838441),
 'Дедовск': (55.8667275, 37.1257097),
 'Волоколамск': (56.0360129, 35.9583358),
 'Дмитров': (56.3375775, 37.5104118),
 'Солнечногорск': (56.1853501, 36.9781536),
 'Хо

In [95]:
vkusvill["dist_to_city_center_car"] = 0
vkusvill["time_to_city_center_car"] = 0

for idx, row in vkusvill.iterrows():
    coords = coordinates_cities[row["Город"]]

    url = f"https://router.project-osrm.org/route/v1/driving/{coords[1]},{coords[0]};{row['Долгота']},{row['Широта']}"
    response = requests.get(url)
    time.sleep(1)

    vkusvill.loc[idx, "time_to_city_center_car"] = response.json()['routes'][0]['duration'] 
    vkusvill.loc[idx, "dist_to_city_center_car"] = response.json()['routes'][0]['distance']

vkusvill["dist_to_city_center_car"] /= 1000
vkusvill["time_to_city_center_car_m"] = vkusvill["time_to_city_center_car"] / 60

param_lambda = 2
vkusvill["p_huff"] = vkusvill["Торговая площадь, м2"] / (vkusvill["time_to_city_center_car_m"] ** param_lambda)

In [107]:
vkusvill.head(5)

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,dist_to_city_center,dist_to_city_center_car,time_to_city_center_car,time_to_city_center_car_m,p_huff
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,1.55043,3330.8,363.0,6.05,1.43706
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,4.039671,12317.4,958.3,15.971667,0.305769
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,3.216366,5326.1,566.2,9.436667,0.819758
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,1.080211,1865.7,175.6,2.926667,9.982047
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.45266,с 8-00 до 22-00,Нет,0.926878,2181.0,222.5,3.708333,7.271809


In [113]:
result = vkusvill[[
    "Номерточки", 
    "dist_to_city_center", 
    "dist_to_city_center_car", 
    "time_to_city_center_car", 
    "time_to_city_center_car_m", 
    "p_huff"
    ]]

In [114]:
result.describe()

Unnamed: 0,Номерточки,dist_to_city_center,dist_to_city_center_car,time_to_city_center_car,time_to_city_center_car_m,p_huff
count,298.0,298.0,298.0,298.0,298.0,298.0
mean,2372.614094,7.532783,9.545774,852.300336,14.205006,22.616857
std,1470.987587,5.859974,6.625868,533.772078,8.896201,155.144861
min,325.0,0.107769,0.1079,14.8,0.246667,0.038674
25%,1320.25,2.113113,3.54885,376.05,6.2675,0.306033
50%,1701.0,6.682216,9.4304,875.15,14.585833,0.780018
75%,3387.75,12.341333,15.014525,1286.1,21.435,3.781455
max,5853.0,22.462504,25.8796,2176.7,36.278333,2317.384953


In [119]:
vkusvill[vkusvill["Номерточки"] == 1649]

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,dist_to_city_center,dist_to_city_center_car,time_to_city_center_car,time_to_city_center_car_m,p_huff
267,1649М_РимскогоКорсакова18,1649,43764,271.0,Москва,Москва,"ул. Римского-Корсакова, д. 18",55.873642,37.609209,с 9-00 до 22-00,Нет,13.726268,16.9925,1591.2,26.52,0.385321


In [115]:
# result.to_csv(os.path.join(PATH_DATA, "vkusvill_dist.csv"), index=False)

# Изучение тегов для магазинов

In [58]:
shop_tags = [
    {'shop': 'department_store'},
    {'shop': 'general'},
    {'shop': 'mall'},
    {'shop': 'supermarket'},
    {'shop': 'wholesale'},

    {"shop":"alcohol"},	
    {"shop":"bakery"},		
    {"shop":"beverages"},		
    {"shop":"brewing_supplies"},	
    {"shop":"butcher"},		
    {"shop":"cheese"},		
    {"shop":"chocolate"},		
    {"shop":"coffee"},	
    {"shop":"confectionery"},	
    {"shop":"convenience"},		
    {"shop":"deli"},	
    {"shop":"dairy"},	
    {"shop":"farm"},	
    {"shop":"frozen_food"},		
    {"shop":"greengrocer"},	
    {"shop":"health_food"},		
    {"shop":"ice_cream"},	
    {"shop":"pasta"},	
    {"shop":"pastry"},		
    {"shop":"seafood"},		
    {"shop":"spices"},		
    {"shop":"tea"},		
    {"shop":"wine"},	
    {"shop":"water"},	
]


In [59]:
data = []

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    for tag in shop_tags:
        key = list(tag.keys())[0]
        value = list(tag.values())[0]

        xx, yy = row["isochrone"].exterior.coords.xy 
        polygon = " ".join([f"{y} {x}" for y, x in zip(yy, xx)])

        shop = api.query(f"""
        <query type="node">
            <polygon-query bounds="{polygon}" into="_"/>
            <has-kv k="{key}" v="{value}"/>
        </query>
        <print/>
        """)

        for node in shop.nodes:
            id_node = node.id
            info_node = vars(shop.get_node(id_node))
            if "name" in info_node["tags"]:
                data.append([row["Номерточки"], info_node["tags"]["name"]])


100%|██████████| 298/298 [31:26<00:00,  6.33s/it]


In [69]:
result = pd.DataFrame(data, columns=["Номерточки", "shop_name"])
result

Unnamed: 0,Номерточки,shop_name
0,4548,Магнит
1,4548,Пятёрочка
2,4548,Чижик
3,4548,Винный маркет
4,4548,Красное&Белое
...,...,...
11009,5063,КуулКлевер
11010,5063,ВкусВилл
11011,5063,ВкусВилл
11012,5063,Малина


In [70]:
result["shop_name"].value_counts().head(50)

shop_name
Пятёрочка                              958
ВкусВилл                               737
Магнит                                 432
Ароматный мир                          399
Дикси                                  394
Винлаб                                 350
Красное&Белое                          343
Магнолия                               246
Продукты                               213
Перекрёсток                            200
Мясницкий ряд                          145
Фасоль                                 129
Лента                                  124
Верный                                 119
У Палыча                               114
Перекресток                            102
АВ Daily                                97
Азбука Вкуса                            86
Отдохни                                 82
Минимаркет                              81
Кулинарная лавка братьев Караваевых     80
Икорный                                 70
КуулКлевер                              70
Г

In [73]:
vkusvill[~vkusvill["Номерточки"].isin(result["Номерточки"].unique().tolist())]

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,isochrone


In [72]:
# result.to_csv(os.path.join(PATH_DATA, "vkusvill_shop_v2.csv"), index=False)

# Расстояние от объекта до центра города

In [15]:
cities = vkusvill["Город"].unique().tolist()
coordinates_cities = {}

In [16]:
for city in cities:
    if city == "Солнечногорский р-он, д. Подолино":
        latitude, longitude = (55.9320362, 37.2478592)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Павловская Слобода":
        latitude, longitude = (55.813991, 37.075951)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Сабурово":
        latitude, longitude = (55.882151, 37.264067)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})")

    elif city == "Новинки":
        latitude, longitude = (55.810157, 37.137701)
        coordinates_cities[city] = (latitude, longitude)
        print(f"{city}: ({latitude}, {longitude})") 

    else:
        location = geolocator.geocode(city)
        coordinates_cities[city] = (location.latitude, location.longitude)
        print(f"{location.address}: ({location.latitude}, {location.longitude})")

Нахабино, городской округ Красногорск, Московская область, Центральный федеральный округ, Россия: (55.8394789, 37.1770987)
Некрасовский, Дмитровский городской округ, Московская область, Центральный федеральный округ, 141865, Россия: (56.0952846, 37.4993722)
Москва, Центральный федеральный округ, Россия: (55.7504461, 37.6174943)
Мытищи, городской округ Мытищи, Московская область, Центральный федеральный округ, Россия: (55.9094928, 37.7339358)
Химки, городской округ Химки, Московская область, Центральный федеральный округ, Россия: (55.8917293, 37.4396994)
Красногорск, городской округ Красногорск, Московская область, Центральный федеральный округ, 143405, Россия: (55.8217545, 37.3400897)
Зеленоград, Москва, Центральный федеральный округ, Россия: (55.9964233, 37.198534)
Долгопрудный, городской округ Долгопрудный, Московская область, Центральный федеральный округ, Россия: (55.9341491, 37.5142417)
Пушкино, Пушкинский городской округ, Московская область, Центральный федеральный округ, Россия:

In [91]:
vkusvill["dist_to_city_center"] = 0
for idx, row in vkusvill.iterrows():
    vkusvill.loc[idx, "dist_to_city_center"] = distance(coordinates_cities[row["Город"]], (row["Широта"], row["Долгота"])).km

In [92]:
vkusvill["dist_to_city_center"].describe()

count    298.000000
mean       7.532783
std        5.859974
min        0.107769
25%        2.113113
50%        6.682216
75%       12.341333
max       22.462504
Name: dist_to_city_center, dtype: float64

In [19]:
# vkusvill.to_csv(os.path.join(PATH_DATA, "vkusvill_city.csv"), index=False)

# Выгрузка данных по изохронам

In [4]:
def get_isochrone(lon, lat, walk_times=[15, 30], speed=4.5, name=None, point_index=None):
    loc = (lat, lon)
    G = ox.graph_from_point(loc, simplify=True, network_type="walk")
    gdf_nodes = ox.graph_to_gdfs(G, edges=False)
    center_node = ox.distance.nearest_nodes(G, lon, lat)

    meters_per_minute = speed * 1000 / 60 
    for u, v, k, data in G.edges(data=True, keys=True):
        data["time"] = data["length"] / meters_per_minute

    polys = []
    for walk_time in walk_times:
        subgraph = nx.ego_graph(G, center_node, radius=walk_time, distance="time")
        node_points = [Point(data["x"], data["y"]) for node, data in subgraph.nodes(data=True)]
        polys.append(gpd.GeoSeries(node_points).unary_union.convex_hull)

    info = {}
    if name:
        info["name"] = [name for _ in walk_times]
    if point_index:
        info["point_index"] = [point_index for _ in walk_times]
    return {**{"geometry": polys, "time": walk_times}, **info}

In [32]:
m = folium.Map(location=[vkusvill["Широта"].mean(), vkusvill["Долгота"].mean()], zoom_start=10)

vkusvill["isochrone"] = None
for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    isochrone = get_isochrone(
        row["Долгота"],
        row["Широта"],
        name=row["Номерточки"],
        point_index=idx,
        walk_times=[25] # [10], [15], [20], [25], [30], 
    )
    vkusvill.loc[idx, "isochrone"] = isochrone["geometry"][0]

    folium.Marker(location=[row["Широта"], row["Долгота"]]).add_to(m)
    folium.GeoJson(isochrone["geometry"][0]).add_to(m)


100%|██████████| 298/298 [08:14<00:00,  1.66s/it]


In [33]:
m

In [35]:
for tag in tqdm(tags, leave=len(tags)):
    key = list(tag.keys())[0]
    value = list(tag.values())[0]
    vkusvill[value] = None
    
    for idx, row in vkusvill.iterrows():
        xx, yy = row["isochrone"].exterior.coords.xy 
        polygon = " ".join([f"{y} {x}" for y, x in zip(yy, xx)])
        
        data = api.query(f"""
        <query type="node">
            <polygon-query bounds="{polygon}" into="_"/>
            <has-kv k="{key}" v="{value}"/>
        </query>
        <print/>
        """)
        vkusvill.loc[idx, value] = len(data.nodes)

100%|██████████| 41/41 [44:15<00:00, 64.78s/it]  


In [36]:
vkusvill['office'] = None
vkusvill['shop'] = None

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    xx, yy = row["isochrone"].exterior.coords.xy 
    polygon = " ".join([f"{y} {x}" for y, x in zip(yy, xx)])

    office = api.query(f"""
    <query type="node">
        <polygon-query bounds="{polygon}" into="_"/>
        <has-kv k="office"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'office'] = len(office.nodes)

    shop = api.query(f"""
    <query type="node">
        <polygon-query bounds="{polygon}" into="_"/>
        <has-kv k="shop"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'shop'] = len(shop.nodes)

100%|██████████| 298/298 [03:24<00:00,  1.46it/s]


In [37]:
vkusvill.shape

(298, 55)

In [38]:
for col in vkusvill.columns:
    if vkusvill[col].nunique() == 1:
        vkusvill.drop(col, axis=1, inplace=True)

In [39]:
vkusvill.shape 

(298, 47)

In [40]:
vkusvill.head(5)

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,isochrone,bus_stop,house,kiosk,retail,station,subway_entrance,tram_stop,bar,cafe,fast_food,food_court,pub,restaurant,college,driving_school,language_school,school,kindergarten,university,car_wash,fuel,atm,bank,clinic,dentist,doctors,hospital,pharmacy,veterinary,theatre,cinema,hostel,hotel,office,shop
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,"POLYGON ((37.187138 55.84134, 37.1840988 55.84...",10,0,1,0,1,0,0,0,2,8,0,0,2,0,0,0,0,1,0,5,0,0,2,2,3,1,0,6,0,0,0,0,1,1,114
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,"POLYGON ((37.5134797 56.1229287, 37.5078506 56...",2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,1,9
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,"POLYGON ((37.5869551 55.7548112, 37.5685647 55...",79,0,13,0,3,7,0,19,88,39,1,5,63,2,1,2,1,8,2,5,2,19,33,23,20,7,2,33,3,3,0,6,13,56,479
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,"POLYGON ((37.7092891 55.9063211, 37.7072855 55...",58,0,0,0,0,0,0,2,8,9,1,1,6,0,0,1,0,5,0,2,2,6,10,5,3,5,2,16,1,1,1,1,1,21,215
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.45266,с 8-00 до 22-00,Нет,"POLYGON ((37.4535742 55.8867949, 37.4511392 55...",40,0,11,1,1,0,0,3,18,23,0,2,7,0,3,0,0,0,0,3,0,10,10,2,9,6,0,34,2,1,0,0,1,23,249


In [41]:
vkusvill.drop("isochrone", axis=1).to_csv(os.path.join(PATH_DATA, "vkusvill_isochrone_25.csv"), index=False)

# Выгрузка данных в радиусе объекта

In [224]:
R = 1000

for tag in tqdm(tags, leave=len(tags)):
    key = list(tag.keys())[0]
    value = list(tag.values())[0]
    vkusvill[value] = None
    
    for idx, row in vkusvill.iterrows():
        data = api.query(f"""
        <query type="node">
            <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
            <has-kv k="{key}" v="{value}"/>
        </query>
        <print/>
        """)
        vkusvill.loc[idx, value] = len(data.nodes)

100%|██████████| 41/41 [40:11<00:00, 58.82s/it]


In [225]:
vkusvill['office'] = None
vkusvill['shop'] = None

for idx, row in tqdm(vkusvill.iterrows(), leave=vkusvill.shape[0], total=vkusvill.shape[0]):
    office = api.query(f"""
    <query type="node">
        <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
        <has-kv k="office"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'office'] = len(office.nodes)

    shop = api.query(f"""
    <query type="node">
        <around lat="{row["Широта"]}" lon="{row["Долгота"]}" radius="{R}"/>
        <has-kv k="shop"/>
    </query>
    <print/>
    """)
    vkusvill.loc[idx, 'shop'] = len(shop.nodes)

298it [02:21,  2.11it/s]


In [250]:
vkusvill.shape

(298, 54)

In [251]:
for col in vkusvill.columns:
    if vkusvill[col].nunique() == 1:
        vkusvill.drop(col, axis=1, inplace=True)

In [252]:
vkusvill.shape

(298, 46)

In [253]:
vkusvill

Unnamed: 0,Наименование,Номерточки,Дата открытия,"Торговая площадь, м2",Регион,Город,Адрес,Широта,Долгота,График,Ночной магазин,fuel,bus_stop,house,kiosk,retail,station,subway_entrance,tram_stop,bar,cafe,fast_food,food_court,pub,restaurant,college,driving_school,language_school,school,kindergarten,university,car_wash,atm,bank,clinic,dentist,doctors,hospital,pharmacy,veterinary,theatre,cinema,hostel,hotel,office,shop
0,4548М_Нах_Белобородова6,4548,44491,52.6,Московская область,Нахабино,"ул. Белобородова, д. 6",55.850328,37.192615,с 9-00 до 22-00,Нет,0,8,0,1,0,0,0,0,0,2,5,0,0,1,0,0,0,0,1,0,4,0,2,2,2,1,0,5,0,0,0,0,0,0,91
1,3297М_Некрас_мкрСтроителей42,3297,43799,78.0,Московская область,Некрасовский,"мкр. Строителей, д. 42",56.131445,37.504663,с 9-00 до 22-00,Нет,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,9
2,1013М_МалГрузинская12,1013,42600,73.0,Москва,Москва,Малая Грузинская 12,55.763797,37.572064,с 8-00 до 22-00,Нет,1,61,0,11,0,3,7,0,16,75,37,1,5,40,1,1,2,1,4,2,5,16,26,20,17,6,2,25,3,2,0,5,7,46,390
3,2530М_Мыт_Юбилейная4,2530,43661,85.5,Московская область,Мытищи,"ул. Юбилейная, д. 4",55.915306,37.720106,с 8-00 до 22-00,Нет,1,45,0,0,0,0,0,0,2,7,8,1,1,6,0,0,1,0,5,0,2,5,9,5,3,5,2,14,0,0,1,1,1,17,184
4,1473М_Хим_Ленинский1,1473,43201,100.0,Московская область,Химки,"Ленинский проспект, 1к2",55.895762,37.452660,с 8-00 до 22-00,Нет,0,36,0,11,1,1,0,0,3,18,22,0,2,6,0,3,0,0,0,0,3,8,9,2,9,6,0,30,2,0,0,0,1,22,229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,1308М_Лоб_Ленина67,1308,44494,370.0,Московская область,Лобня,"г. Лобня, ул. Ленина д.67",56.008319,37.449659,с 9-00 до 22-00,Нет,0,21,0,0,0,0,0,0,0,3,1,0,0,3,0,2,0,0,0,0,2,3,2,5,2,3,0,5,0,1,0,0,0,10,78
294,5348М_Ист_Ленина65,5348,44446,310.0,Московская область,Истра,"ул. Ленина, дом № 65",55.912244,36.859008,с 7-00 до 23-00,Нет,0,20,0,0,0,0,0,0,1,16,19,0,2,4,0,2,1,0,0,0,0,13,7,3,2,7,0,15,1,1,1,0,0,31,205
295,3072М_БольшаяСпасская8,3072,43901,367.7,Москва,Москва,"ул. Большая Спасская , д. 8, стр. 1А",55.775839,37.644171,с 7:00 до 23:45,Да,1,43,0,13,0,9,23,19,13,86,72,3,4,49,0,1,2,1,2,1,4,45,38,18,11,5,0,28,1,6,0,10,16,143,411
296,3903М_Красн_Подмосковный2,3903,43976,371.9,Московская область,Красногорск,"б-р Подмосковный, д. 2 + ""ВкусВиллАйс""",55.819236,37.365523,с 7:00 до 23:45,Да,0,27,0,0,0,0,0,0,4,24,10,0,3,6,0,0,5,0,2,0,6,8,5,11,9,6,0,29,1,0,0,3,2,10,278


In [3]:
data = api.query("""
<query type="node">
    <around lat="55.763797" lon="37.572064" radius="1000"/>
    <has-kv k="amenity" v="pharmacy"/>
</query>
<print/>
""")
	
print(len(data.nodes))
data.nodes

# https://www.openstreetmap.org/node/id_object

25


[<overpy.Node id=708691902 lat=55.7635012 lon=37.5571529>,
 <overpy.Node id=3212575362 lat=55.7642062 lon=37.5603926>,
 <overpy.Node id=3214920431 lat=55.7653716 lon=37.5644551>,
 <overpy.Node id=3214921041 lat=55.7650991 lon=37.5638257>,
 <overpy.Node id=3224714882 lat=55.7619286 lon=37.5634722>,
 <overpy.Node id=3231542845 lat=55.7612178 lon=37.5711785>,
 <overpy.Node id=3661818210 lat=55.7718137 lon=37.5787271>,
 <overpy.Node id=4076692489 lat=55.7637502 lon=37.5639871>,
 <overpy.Node id=4082482989 lat=55.7604141 lon=37.5677605>,
 <overpy.Node id=4121408789 lat=55.7636830 lon=37.5600832>,
 <overpy.Node id=4123545091 lat=55.7693400 lon=37.5828251>,
 <overpy.Node id=4514947389 lat=55.7618186 lon=37.5625323>,
 <overpy.Node id=4620215805 lat=55.7635354 lon=37.5718864>,
 <overpy.Node id=4620215806 lat=55.7633607 lon=37.5713126>,
 <overpy.Node id=5997704384 lat=55.7622911 lon=37.5680359>,
 <overpy.Node id=6103668182 lat=55.7689122 lon=37.5729190>,
 <overpy.Node id=6177029420 lat=55.756859

In [254]:
# vkusvill.to_csv(os.path.join(PATH_DATA, "vkusvill_new_features.csv"), index=False)