In [1]:
import geopandas as gpd
import pandas as pd
import json
import requests
import numpy as np
from geopy.geocoders import Nominatim, Here
from geopy.exc import GeocoderTimedOut
from ipykernel import kernelapp as app
from geopy import distance
import osmnx as ox
from shapely import wkt
import matplotlib
from folium.plugins import MarkerCluster
import random
import json
from shapely.geometry import Polygon
import time
from scipy.spatial import KDTree



In [902]:
import warnings
warnings.filterwarnings("ignore")

In [884]:
warnings.resetwarnings()

## Сегодня в программе:
0. Введение в геоаналитику и геомаркетинг 
1. Узнаем что такое гексагоны и чем они могут быть полезны для визуализации и анализа гео данных
2. Познакомимся с API Яндекс организаций 
3. Научимся работать с гео данными OSM 
4. Краем глаза посмотрим на модель спроса Huff
5. На основе наших стараний найдем лучшие места в Москве для открытия фитнес центров, салонов красоты, продуктовых магазинов, шаурмичных (да чего угодно) и немного подискутируем на эту тему 

## 1. Гексагоны (H3: Uber’s Hexagonal Hierarchical Spatial Index)**

Если коротко, то Uber опубликовал open source проект, с помощью которого можно легко и просто нанести на карту красивые многоугольники:) Подробнее: https://eng.uber.com/h3/
![alt text](https://1fykyq3mdn5r21tpna3wkdyi-wpengine.netdna-ssl.com/wp-content/uploads/2018/06/Twitter-H3-768x384.png)



In [166]:
# сгенерим словарь цветов, который нам пригодится для визуализации объектов карты
rgb_colors = {}
for name, hex in matplotlib.colors.cnames.items():
    rgb_colors[name] = matplotlib.colors.to_rgb(hex)

colors = list(rgb_colors.keys())

In [4]:
from h3 import h3
import folium

def visualize_hexagons(hexagons, color="red", folium_map=None):

    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=20, tiles='cartodbpositron')
    else:
        m = folium_map
        
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
        m.add_child(my_PolyLine)
    return m
    

def visualize_polygon(polyline, color):
    
    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
    m.add_child(my_PolyLine)
    
    return m


def visualize_points(lats, lons, pops, cuts):

    
    m = folium.Map(location=[sum(lats)/len(lats), sum(lons)/len(lons)], zoom_start=13, tiles='cartodbpositron')
        
#     dict_colors = dict.fromkeys(types)
#     color_ = random.sample(colors, len(dict_colors))
#     for pop, color in zip(types, color_):
#         dict_colors[pop] = color

    if len(pops) == 0:
        pops = list(np.full(len(lats), False))
        
    if len(cuts) == 0:
        cuts = list(np.full(len(lats), 'blue'))
    
    
    for lat, lon, pop, cut in zip(lats, lons, pops, cuts):
        folium.CircleMarker(location=[lat, lon], radius = 9, popup=str(pop), 
                            fill_color=color_change(cut), fill_opacity = 1.5).add_to(m)
    
    return m

def visualize_polygons(geometry):
    
    
    lats, lons = get_lat_lon(geometry)
    
    m = folium.Map(location=[sum(lats)/len(lats), sum(lons)/len(lons)], zoom_start=13, tiles='cartodbpositron')
    
    overlay = gpd.GeoSeries(geometry).to_json()
    folium.GeoJson(overlay, name = 'boundary').add_to(m)
    
    return m

def create_hexagons(geoJson):
    
    polyline = geoJson['coordinates'][0]

    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color="green")
    m.add_child(my_PolyLine)

    hexagons = list(h3.polyfill(geoJson, 8))
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=3,color='red')
        m.add_child(my_PolyLine)
        
    polylines_x = []
    for j in range(len(polylines)):
        a = np.column_stack((np.array(polylines[j])[:,1],np.array(polylines[j])[:,0])).tolist()
        polylines_x.append([(a[i][0], a[i][1]) for i in range(len(a))])
        
    polygons_hex = pd.Series(polylines_x).apply(lambda x: Polygon(x))
        
    return m, polygons_hex, polylines

def create_choropleth(data, json, columns, legend_name, feature, bins):
    
    lat, lon = get_lat_lon(data['geometry'])

    m = folium.Map(location=[sum(lat)/len(lat), sum(lon)/len(lon)], zoom_start=13, tiles='cartodbpositron')
    
#     if bins_ == True:
#         bins = list(data[feature].quantile([0.25, 0.5, 0.75, 1]))
#     else:
#         bins = 5

    folium.Choropleth(
        geo_data=json,
        name="choropleth",
        data=data,
        columns=columns,
        key_on="feature.id",
        fill_color="YlGn",
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=legend_name,
        nan_fill_color = 'black',
        popup=label,
        bins = bins

    ).add_to(m)

    folium.LayerControl().add_to(m)

    return m


def create_heatmap(data, lat_lon_feature):
    
    m = folium.Map(location=[sum(data['lat'])/len(data['lat']), sum(data['lon'])/len(data['lon'])], zoom_start=13, tiles='cartodbpositron')
    
    plugins.HeatMap(data[lat_lon_feature].groupby(lat_lon_feature[0:2]).sum().reset_index().values.tolist(), 
                    radius = 70, min_opacity = 0.05, max_val = int((data[lat_lon_feature[2]]).quantile([0.75])), blur=30).add_to(m)
    return m

# geometry в зависимости от объекта может как полигоном (Polygon), так и координатами (Points)
# создадим функцию, которая будет для полигонов создавать центроиды lat, lon, для Points - lat, lon

def get_lat_lon(geometry):
        
    lon = geometry.apply(lambda x: x.x if x.type == 'Point' else x.centroid.x)
    lat = geometry.apply(lambda x: x.y if x.type == 'Point' else x.centroid.y)
    return lat, lon
    

In [21]:
# пример визуализации 1 гексагона
h3_address = h3.geo_to_h3(45.035470, 38.975313,  9) # 9 - индекс, определяющий размер гексагона                                                                                                     
m = visualize_hexagons([h3_address])
display(m)

## 2. Гексагоны для г. Краснодар

In [2]:
# выгрузим границы Краснодара из OSM
cities = ['Краснодар']
polygon_krd = ox.geometries_from_place(cities, {'boundary':'administrative'}).reset_index()
polygon_krd = polygon_krd[(polygon_krd['name'] == 'городской округ Краснодар')]

In [5]:
# посмотрим что получилось
visualize_polygons(polygon_krd['geometry'])

In [6]:
# сгенерим гексагоны внутри полигона г. Краснодар
geoJson = json.loads(gpd.GeoSeries(polygon_krd['geometry']).to_json())
geoJson = geoJson['features'][0]['geometry']
geoJson = {'type':'Polygon','coordinates': [np.column_stack((np.array(geoJson['coordinates'][0])[:, 1],
                                                      np.array(geoJson['coordinates'][0])[:, 0])).tolist()]}

m, polygons, polylines = create_hexagons(geoJson)

In [7]:
m

In [1222]:
polylines[0]

'[(45.07775256155123, 38.89681124198797), (45.073775887124256, 38.89355177649113), (45.069565965480315, 38.897039566301835), (45.06933266364124, 38.90378623590471), (45.07330913992323, 38.907045816809536), (45.07751911618514, 38.903558612780635), (45.07775256155123, 38.89681124198797)]'

In [93]:
# а вот и наши гексагоны, которые мы будем пересекать с объектами и считать разные фичи в разрезе гексагонов =)
polygons.head(5)

0    POLYGON ((38.82252102057763 45.19883362498742,...
1    POLYGON ((39.12726258394022 45.02049834193902,...
2    POLYGON ((38.95508050953221 45.1656324591664, ...
3    POLYGON ((38.92457902603243 45.17079008393334,...
4    POLYGON ((38.92525599921218 45.14625299286397,...
dtype: object

## 2. Выгрузка объектов из OSM, Yandex

Для дальнейшей геоаналитики нам понадобятся объекты (координаты) из OSM: 
- жилые, административные здания
- остановки
- продуктовые магазины
- торговые, бизнес центры
- школы, университеты
- рестораны
- и др. объекты 

*у api яндекса есть ограничения, но вы можете зарегестрировать свой ключ в кабинете разработчика и поэкспериментировать = ) (преимущество Яндекса в более актуальных данных)

### OSM 
https://wiki.openstreetmap.org/wiki/RU:Объекты_карты

In [168]:
def osm_query(tag, city):
    gdf = ox.geometries_from_place(city, tag).reset_index()
    gdf['city'] = np.full(len(gdf), city.split(',')[0])
    gdf['object'] = np.full(len(gdf), list(tag.keys())[0])
    gdf['type'] = np.full(len(gdf), tag[list(tag.keys())[0]])
    gdf = gdf[['city', 'object', 'type', 'geometry']]
    print(gdf.shape)
    return gdf

In [170]:
# Выгрузим интересующие нас категории объектов 
tags = [{'building' : 'apartments'}, {'building' : 'detached'}, 
        {'building' : 'dormitory'}, {'building' : 'hotel'}, {'building' : 'house'}, 
       {'building' : 'semidetached_house'}, {'building' : 'terrace'},  {'building' : 'commercial'},
        {'building' : 'office'},  {'building' : 'terrace'},  {'building' : 'terrace'}, {'building':'retail'}, 
        {'building':'train_station'},
        
        {'highway' : 'bus_stop'}, {'footway':'crossing'},
        
       {'amenity':'cafe'}, {'amenity':'fast_food'}, {'amenity':'restaurant'}, {'amenity':'college'}, 
        {'amenity':'language_school'},  {'amenity':'school'},  {'amenity':'university'},  {'amenity':'atm'},  
        {'amenity':'bank'},  {'amenity':'clinic'},  {'amenity':'hospital'},  {'amenity':'pharmacy'},  
        {'amenity':'theatre'},  {'amenity':'townhall'},  {'amenity':'bench'}, 
       ]
cities = ['Краснодар, Россия']

gdfs = []
for city in cities:
    for tag in tags:
        gdfs.append(osm_query(tag, city))

(2246, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(782, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(43, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(11, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(17818, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(403, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(175, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(108, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(170, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(175, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(175, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values
  result[:] = values


(447, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(2, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(556, 4)


  aout[:] = out
  aout[:] = out


(418, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values
  result[:] = values


(379, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(346, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(147, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(24, 4)


  aout[:] = out
  aout[:] = out


(1, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(117, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(50, 4)


  aout[:] = out
  aout[:] = out


(294, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(210, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(47, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(54, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(315, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(20, 4)


  aout[:] = out
  aout[:] = out
  result[:] = values


(13, 4)


  aout[:] = out
  aout[:] = out


(471, 4)


In [171]:
# посмотрим что получилось
data_poi = pd.concat(gdfs)
data_poi.groupby(['city','object','type'], as_index = False).agg({'geometry':'count'})

Unnamed: 0,city,object,type,geometry
0,Краснодар,amenity,atm,294
1,Краснодар,amenity,bank,210
2,Краснодар,amenity,bench,471
3,Краснодар,amenity,cafe,379
4,Краснодар,amenity,clinic,47
5,Краснодар,amenity,college,24
6,Краснодар,amenity,fast_food,346
7,Краснодар,amenity,hospital,54
8,Краснодар,amenity,language_school,1
9,Краснодар,amenity,pharmacy,315


In [172]:
# добавим координаты/центроиды
lat, lon = get_lat_lon(data_poi['geometry'])
data_poi['lat'] = lat
data_poi['lon'] = lon
print(data_poi.shape)
data_poi.head()

(26017, 6)


Unnamed: 0,city,object,type,geometry,lat,lon
0,Краснодар,building,apartments,"POLYGON ((39.09116 45.02774, 39.09209 45.02785...",45.027743,39.091635
1,Краснодар,building,apartments,"POLYGON ((39.10237 45.03824, 39.10230 45.03856...",45.038388,39.102232
2,Краснодар,building,apartments,"POLYGON ((39.03289 45.01527, 39.03188 45.01528...",45.015222,39.032386
3,Краснодар,building,apartments,"POLYGON ((39.03302 45.01550, 39.03204 45.01550...",45.015567,39.032531
4,Краснодар,building,apartments,"POLYGON ((39.03238 45.01644, 39.03212 45.01583...",45.016118,39.032335


In [254]:
# посмотрим что получилось на карте (сохраним карту в формате html страницы, дабы не перегружать ноутбучек:))
visualize_points(lat, lon, itog_data['type']).save("map_1.html")

### Yandex организации
https://developer.tech.yandex.ru/ - кабинет разработчика (создаем свой ключ для использования сериса Яндекс.Организации) \
https://yandex.ru/dev/maps/geosearch/doc/examples/geosearch_examples.html - примеры

In [1181]:
geometry_uniq.head()

Unnamed: 0,geometry,id,lat,lon
0,"POLYGON ((38.83583542416847 45.08802333615556,...",0,45.092002,38.839094
1,"POLYGON ((38.91447184712654 45.04851351481249,...",1,45.052724,38.910986
2,"POLYGON ((39.20149152993542 45.00959024450938,...",2,45.005373,39.204953
3,"POLYGON ((38.9586353299835 45.03470451955536, ...",3,45.03868,38.961895
4,"POLYGON ((38.84420805796285 45.14905962838706,...",4,45.144853,38.847703


In [1182]:
#делаем запрос по категориям для каждой точки 
categories = ['Продукты','Школа','Университет','Колледж','Бизнес-центр', 'Торговый центр','Достопримечательность',
              'Ресторан','Кафе','Кофейня', 'Кондитерская','Быстрое питание','Больница','Банк','Банкомат','Гостиница',
              'Фитнес-клуб','Салон красоты',]

#  нюансы: если выгружать без цикла по центроидам - яндекс выгружает не все объекты (но в этом примере мы будем использовать без цикла т. к. у ключа есть ограничения)

# tabs = []
# for category in categories:
#     for lat, lon in zip(geometry_uniq['lat'], geometry_uniq['lon']):
#         r = requests.get(f"https://search-maps.yandex.ru/v1/?text={'Краснодар' + category}&ll={lon},{lat}&spn=1,1&results=50&lang=ru_RU&apikey=5dcff3d1-3b27-4fca-a2d8-e25ed8d3ee30")
#         a = pd.json_normalize(json.loads(r.text), max_level=0)['features'][0]
#         a = pd.json_normalize(a)
#         a['category'] = list(np.full(len(a), category))
#         tabs.append(a)
        
tabs = []
for category in categories:
    r = requests.get(f"https://search-maps.yandex.ru/v1/?text={'Краснодар' + category}&results=2000&lang=ru_RU&apikey=5dcff3d1-3b27-4fca-a2d8-e25ed8d3ee30")
    a = pd.json_normalize(json.loads(r.text), max_level=0)['features'][0]
    a = pd.json_normalize(a)
    a['category'] = list(np.full(len(a), category))
    tabs.append(a)

KeyError: 'features'

In [1161]:
data_yandex = pd.concat(tabs)
data_yandex = data_yandex.drop_duplicates()
data_yandex.head()

Unnamed: 0,type,geometry.type,geometry.coordinates,properties.name,properties.description,properties.boundedBy,properties.CompanyMetaData.id,properties.CompanyMetaData.name,properties.CompanyMetaData.address,properties.CompanyMetaData.url,properties.CompanyMetaData.Phones,properties.CompanyMetaData.Categories,properties.CompanyMetaData.Hours.text,properties.CompanyMetaData.Hours.Availabilities,category
0,Feature,Point,"[38.995813, 45.097895]",Metro Cash & Carry,"Ростовское ш., 30, Краснодар, Россия","[[38.9917075, 45.09498743], [38.9999185, 45.10...",1201893648,Metro Cash & Carry,"Россия, Краснодар, Ростовское шоссе, 30",https://krd.metro-cc.ru/,"[{'type': 'phone', 'formatted': '+7 (861) 210-...","[{'class': 'supermarket', 'name': 'Супермаркет...","ежедневно, 05:00–01:00","[{'Intervals': [{'from': '05:00:00', 'to': '01...",Продукты
1,Feature,Point,"[38.959577, 45.021893]",Табрис,"Кубанская наб., 25, Центральный округ, микрора...","[[38.955472, 45.01898143], [38.963682, 45.0248...",74870540332,Табрис,"Россия, Краснодар, микрорайон Центральный, Куб...",http://magazine.tabris.ru/,"[{'type': 'phone', 'formatted': '+7 (861) 234-...","[{'class': 'supermarket', 'name': 'Супермаркет'}]","ежедневно, круглосуточно","[{'TwentyFourHours': True, 'Everyday': True}]",Продукты
2,Feature,Point,"[38.981526, 45.069094]",Морковь,"ул. Фёдора Лузана, 23, Краснодар, Россия","[[38.9774205, 45.06618493], [38.9856315, 45.07...",1899047211,Морковь,"Россия, Краснодар, улица Фёдора Лузана, 23",,"[{'type': 'phone', 'formatted': '+7 (918) 195-...","[{'class': 'supermarket', 'name': 'Магазин ово...","ежедневно, 09:00–22:00","[{'Intervals': [{'from': '09:00:00', 'to': '22...",Продукты
3,Feature,Point,"[39.010491, 45.049456]",Продукты,"ул. Красных Зорь, 18/1, Краснодар, Россия","[[39.006386, 45.04654593], [39.014596, 45.0523...",161259110625,Продукты,"Россия, Краснодар, улица Красных Зорь, 18/1",,,"[{'class': 'supermarket', 'name': 'Магазин про...","ежедневно, 10:00–22:00","[{'Intervals': [{'from': '10:00:00', 'to': '22...",Продукты
4,Feature,Point,"[38.962157, 45.060505]",Табрис,"ул. Тургенева, 138/6, Фестивальный микрорайон,...","[[38.958052, 45.05759543], [38.966262, 45.0634...",1334431582,Табрис,"Россия, Краснодар, Фестивальный микрорайон, ул...",https://tabris.ru/,"[{'type': 'phone', 'formatted': '+7 (861) 234-...","[{'class': 'supermarket', 'name': 'Супермаркет'}]","ежедневно, круглосуточно","[{'TwentyFourHours': True, 'Everyday': True}]",Продукты


In [1163]:
data_yandex.groupby(['category']).agg({'properties.CompanyMetaData.id':'count'})

Unnamed: 0_level_0,properties.CompanyMetaData.id
category,Unnamed: 1_level_1
Банк,9
Банкомат,500
Бизнес-центр,103
Больница,206
Быстрое питание,500
Гостиница,479
Достопримечательность,350
Кафе,500
Колледж,39
Кондитерская,460


## Spatial Join

In [1146]:
# sjoin - spatial join - пересекаем гексагоны с объектами (определяем какие объекты находятся в разрезе каждого гексагона)

gdf_1 = gpd.GeoDataFrame(data_poi, geometry=gpd.points_from_xy(data_poi.lon, data_poi.lat))

gdf_2 = pd.DataFrame(polygons, columns = ['geometry'])
gdf_2['polylines'] = polylines
gdf_2['geometry'] = gdf_2['geometry'].astype(str)
geometry_uniq = pd.DataFrame(gdf_2['geometry'].drop_duplicates())
geometry_uniq['id'] = np.arange(len(geometry_uniq)).astype(str)
gdf_2 = gdf_2.merge(geometry_uniq, on = 'geometry')
gdf_2['geometry'] = gdf_2['geometry'].apply(wkt.loads)
gdf_2 = gpd.GeoDataFrame(gdf_2, geometry='geometry')

itog_table = gpd.sjoin(gdf_2, gdf_1, how='left', op='intersects')
itog_table = itog_table.dropna()
itog_table.head()

Unnamed: 0,geometry,polylines,id,index_right,city,object,type,lat,lon
1,"POLYGON ((38.91447 45.04851, 38.91773 45.05249...","[(45.07775256155123, 38.89681124198797), (45.0...",1,92.0,Краснодар,amenity,pharmacy,45.055595,38.911879
1,"POLYGON ((38.91447 45.04851, 38.91773 45.05249...","[(45.07775256155123, 38.89681124198797), (45.0...",1,767.0,Краснодар,building,apartments,45.05636,38.911558
1,"POLYGON ((38.91447 45.04851, 38.91773 45.05249...","[(45.07775256155123, 38.89681124198797), (45.0...",1,19.0,Краснодар,amenity,fast_food,45.054411,38.909341
1,"POLYGON ((38.91447 45.04851, 38.91773 45.05249...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7239.0,Краснодар,building,house,45.053202,38.912871
1,"POLYGON ((38.91447 45.04851, 38.91773 45.05249...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7208.0,Краснодар,building,house,45.053223,38.914818


In [92]:
# itog_table.to_csv('hex_and_poi.csv', sep = ';')

In [94]:
# itog_table = pd.read_csv('hex_and_poi.csv', sep = ';')

In [668]:
itog_table = pd.DataFrame(itog_table).dropna()
itog_table['geometry'] = itog_table['geometry'].astype(str) #для groupby
itog_table['id'] = itog_table['id'].astype(str) #для Choropleth
agg_all = itog_table.groupby(['geometry','type','polylines','id'], as_index = False).agg({'lat':'count'}).rename(columns = {'lat':'counts'})
agg_all['geometry'] = agg_all['geometry'].apply(wkt.loads) #возвращаем формат геометрий

In [669]:
# подготовка данных для Choropleth
agg_all_cafe = agg_all.query("type == 'cafe'")[["geometry","counts",'id']]
agg_all_cafe['id'] = agg_all_cafe['id'].astype(str)
data_geo_1 = gpd.GeoSeries(agg_all_cafe.set_index('id')["geometry"]).to_json()

  aout[:] = out


In [670]:
# смотрим где есть потенциальные гексагончики=) (или не совсем потенциальные?)
create_choropleth(agg_all_cafe, data_geo_1, ["id","counts"], 'Cafe counts', 'counts', bins_ = False)

In [306]:
m.save("map_2.html")

In [678]:
list(agg_all['type'].drop_duplicates())

['bus_stop',
 'crossing',
 'house',
 'office',
 'apartments',
 'commercial',
 'atm',
 'bench',
 'retail',
 'school',
 'terrace',
 'cafe',
 'detached',
 'fast_food',
 'pharmacy',
 'restaurant',
 'townhall',
 'bank',
 'clinic',
 'hotel',
 'dormitory',
 'university',
 'hospital',
 'semidetached_house',
 'college',
 'theatre',
 'train_station',
 'language_school']

In [679]:
# посмотрим фичу кол-во poi якорей трафика
poi_i = ['bus_stop',
        'commercial',
        'atm',
        'bench',
        'retail',
        'school',
        'fast_food',
        'pharmacy',
        'restaurant',
        'bank',
        'clinic',
        'hotel',
        'university',
        'hospital',
        'college',
        'theatre',
        'train_station']
agg_all_poi_interest = agg_all.query("type in @poi_i").groupby(['id'], as_index = False).agg({'counts':'sum'}).rename(columns = {'counts':'counts_poi'})

agg_all['geometry'] = agg_all['geometry'].astype(str)
agg_all_2 = agg_all[['id', 'geometry']].drop_duplicates().merge(agg_all_poi_interest[['id', 'counts_poi']], how = 'left',on = 'id').merge(agg_all.query("type == 'cafe'")[['id' , 'counts']], how = 'left', on = 'id')
agg_all_2['geometry'] = agg_all_2['geometry'].apply(wkt.loads)
agg_all_2.head()

create_choropleth(agg_all_2, gpd.GeoSeries(agg_all_2.set_index('id')['geometry']).to_json(), ["id","counts_poi"], 'counts_poi', "counts_poi", bins_ = False)

  aout[:] = out


In [680]:
# посмотрим фичу (кол-во poi якорей трафика)/кол-во кофеен

agg_all_2 = agg_all_2.fillna(1)
agg_all_2['feature_1'] = agg_all_2['counts_poi']/agg_all_2['counts']
create_choropleth(agg_all_2, gpd.GeoSeries(agg_all_2.set_index('id')['geometry']).to_json(), ["id","feature_1"], 'feature_1', "feature_1", bins_ = False)

  aout[:] = out


In [717]:
# выгрузим апартаменты (многоэтажки) отдельно и проверим полноту данных фич: building:levels (этажи), building:flats - кол-во квартир
gdf_aparts = ox.geometries_from_place(city, {'building' : 'apartments'}).reset_index()

  aout[:] = out
  aout[:] = out
  result[:] = values
  result[:] = values


In [729]:
gdf_aparts.head()

Unnamed: 0,element_type,osmid,name,geometry,operator,addr:housenumber,addr:street,phone,website,internet_access,...,rooms,tourism,building:levels:underground,construction:addr:housenumber,ele,construction:addr:street,layer,ways,type,building:parts
0,way,26938901,,"POLYGON ((39.09116 45.02774, 39.09209 45.02785...",,4,Симферопольская улица,,,,...,,,,,,,,,,
1,way,26938903,,"POLYGON ((39.10237 45.03824, 39.10230 45.03856...",,9/2,улица 30-й Иркутской Дивизии,,,,...,,,,,,,,,,
2,way,26938904,,"POLYGON ((39.03289 45.01527, 39.03188 45.01528...",,100,улица Айвазовского,,,,...,,,,,,,,,,
3,way,26938914,,"POLYGON ((39.03302 45.01550, 39.03204 45.01550...",,102,улица Айвазовского,,,,...,,,,,,,,,,
4,way,26938936,,"POLYGON ((39.03238 45.01644, 39.03212 45.01583...",,104,улица Айвазовского,,,,...,,,,,,,,,,


In [727]:
# полнота building:levels (этажи)
np.round(len(gdf_aparts['building:levels'].dropna())/len(gdf_aparts['building:levels']), 2)

0.85

In [728]:
# полнота building:flats (квартиры)
np.round(len(gdf_aparts['building:flats'].dropna())/len(gdf_aparts['building:flats']), 2)

0.04

#### Ok, Гугл: сколько в среднем квартир на этаже?

Гугл вещает, что кол-во квартир напрямую зависит от класса жилья:
- эконом - в среднем 10
- комфорт - в среднем 6-8
- бизнес - в среднем 4-6

Мы можем (как отчаянные дата - сатанисты) углубиться в эту тему:
- парсить объявления авито - https://developers.avito.ru/?_ga=2.8241522.1102112241.1628779747-1391354864.1628779747
- на основе стоимости 1 кв. м  определять класс объекта, да и данных по объектам недвижимости будет больше, чем с OSM

Но я хочу будь девушкой - загадкой и оставлю вкусные статьи на потом, где мы и поизучаем данные API \
Поэтому дальше будем использовать в среднем 10 квартир на этаж (доля бизнес и комфорт класса не так велика)

In [736]:
itog_table['type'].drop_duplicates()

1                 pharmacy
1               apartments
1                fast_food
1                    house
3                 bus_stop
3                     bank
3                     cafe
3                   clinic
8                 detached
8                   retail
8                  college
10                crossing
18              commercial
38                  school
38                     atm
42               dormitory
46              restaurant
56              university
57                  office
96                   bench
109               townhall
121     semidetached_house
123               hospital
125                theatre
203                terrace
226                  hotel
734          train_station
1075       language_school
Name: type, dtype: object

In [834]:
# добавим фичу - население

lat_g, lon_g = get_lat_lon(gdf_aparts['geometry'].apply(wkt.loads))
gdf_aparts['lat'] = lat_g
gdf_aparts['lon'] = lon_g

itog_table_people = itog_table.merge(gdf_aparts[['lat', 'lon', 'building:levels']], on = ['lat', 'lon'], how = 'left')
itog_table_people['building:levels'] = itog_table_people['building:levels'].fillna(1)
itog_table_people = itog_table_people.rename(columns = {'building:levels' : 'levels'})

apartments = ['apartments' , 'dormitory']
houses = ['house', 'semidetached_house', 'detached', 'terrace']
people_ctn = []

# в среднем возьмем 3 чел. на семью

for i in range(len(itog_table_people)):
    
    if itog_table_people['type'].iloc[i] in apartments:
        
        people = int(itog_table_people['levels'].iloc[i])*10*3
        
    elif itog_table_people['type'].iloc[i] in houses:
        
        people = int(itog_table_people['levels'].iloc[i])*3
        
    else:
        people = 'not living area'
        
    people_ctn.append(people)
    
itog_table_people['count_people'] = people_ctn

In [835]:
itog_table_people.head()

Unnamed: 0,geometry,polylines,id,index_right,city,object,type,lat,lon,levels,count_people
0,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,92.0,Краснодар,amenity,pharmacy,45.055595,38.911879,1,not living area
1,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,767.0,Краснодар,building,apartments,45.05636,38.911558,16,480
2,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,19.0,Краснодар,amenity,fast_food,45.054411,38.909341,1,not living area
3,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7239.0,Краснодар,building,house,45.053202,38.912871,1,3
4,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7208.0,Краснодар,building,house,45.053223,38.914818,1,3


In [864]:
table_people = itog_table_people.query("count_people != 'not living area'")
table_people['count_people'] = table_people['count_people'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [865]:
# карта плотности населения
create_heatmap(table_people, ['lat', 'lon', 'count_people'])



In [866]:
table_people['geometry'] = table_people['geometry'].astype(str)
table_people_osm_agg = table_people.groupby(['id', 'geometry'], as_index = False).agg({'count_people':'sum'})
table_people_osm_agg['geometry'] = table_people_osm_agg['geometry'].apply(wkt.loads)
table_people_osm_agg.sort_values('count_people', ascending = False).head(20)

Unnamed: 0,id,geometry,count_people
156,582,"POLYGON ((39.00755853982577 45.09430882987476,...",23007
251,944,"POLYGON ((39.02538478832323 45.05688247599272,...",18840
67,177,"POLYGON ((39.01495059420091 45.06951937005982,...",18543
21,1070,"POLYGON ((39.03517329755768 45.06879775752244,...",14058
22,1075,"POLYGON ((38.99428848646149 45.08660520148189,...",13407
27,1085,"POLYGON ((39.02559971204815 45.04869730656608,...",12924
33,1119,"POLYGON ((39.07749584756829 44.99367262989065,...",12090
136,509,"POLYGON ((39.01799763375094 45.08167460484595,...",11364
242,892,"POLYGON ((39.08991898047015 45.03412129869394,...",11310
82,274,"POLYGON ((39.02971990672373 45.01992416432098,...",10581


In [867]:
# посмотрим на 1 гексагон
table_people.query("id == '582'").sort_values('count_people', ascending = False).head(20)

Unnamed: 0,geometry,polylines,id,index_right,city,object,type,lat,lon,levels,count_people
11304,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1419.0,Краснодар,building,apartments,45.093695,39.010937,17,510
11224,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1460.0,Краснодар,building,apartments,45.09341,39.007258,17,510
11301,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1464.0,Краснодар,building,apartments,45.09351,39.010295,17,510
11410,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1531.0,Краснодар,building,apartments,45.091179,39.008559,17,510
11412,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1532.0,Краснодар,building,apartments,45.091206,39.009267,17,510
11227,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1465.0,Краснодар,building,apartments,45.093648,39.008826,17,510
11226,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1466.0,Краснодар,building,apartments,45.093498,39.00954,17,510
11225,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1463.0,Краснодар,building,apartments,45.093438,39.00814,17,510
11287,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1530.0,Краснодар,building,apartments,45.0906,39.011531,17,510
11294,"POLYGON ((39.00755853982577 45.09430882987476,...","[(45.052256416811026, 38.924475168634245), (45...",582,1418.0,Краснодар,building,apartments,45.092333,39.010574,11,330


In [909]:
table_people.head()

Unnamed: 0,geometry,polylines,id,index_right,city,object,type,lat,lon,levels,count_people,cut
1,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,767.0,Краснодар,building,apartments,45.05636,38.911558,16,480,
3,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7239.0,Краснодар,building,house,45.053202,38.912871,1,3,
4,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7208.0,Краснодар,building,house,45.053223,38.914818,1,3,
5,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7241.0,Краснодар,building,house,45.053265,38.913337,1,3,
6,"POLYGON ((38.91447184712654 45.04851351481249,...","[(45.07775256155123, 38.89681124198797), (45.0...",1,7236.0,Краснодар,building,house,45.053297,38.913778,1,3,


In [908]:
# визуализируем 5 гексагонов с максимальными каунтами (раскрасим в зависимости от count_people)

# table_people['geometry'] = table_people['geometry'].apply(wkt.loads)

_5_gexes = list(table_people_osm_agg.sort_values('count_people', ascending = False).head(5).loc[:,'id'])
table_5_gexes = table_people.query("id in @_5_gexes")

table_people['cut'] = pd.cut(table_5_gexes.loc[:,'count_people'], bins = 5, labels = ['l1','l2','l3','l4','l5'])

def color_change(x):
    
    if x == 'l1':
        return('blue')
    elif x == 'l2':
        return('green')
    elif x == 'l3':
        return('orange')
    elif x == 'l4':
        return('red')
    elif x == 'l5':
        return('darkred')
    else:
        return('black')


visualize_points(table_5_gexes.loc[:,'lat'], table_5_gexes.loc[:,'lon'], table_5_gexes['count_people'], table_5_gexes['cut'])

In [946]:
# гексагоны населения 
create_choropleth(table_people_osm_agg, 
                  gpd.GeoSeries(table_people_osm_agg.set_index('id')['geometry']).to_json(), 
                              ["id","count_people"], 'count_people', "count_people", bins = 9)

In [947]:
table_people_osm_agg.head()

Unnamed: 0,id,geometry,count_people
0,1,"POLYGON ((38.91447184712654 45.04851351481249,...",630
1,1001,"POLYGON ((39.01646194105481 45.01221477326503,...",2400
2,1004,"POLYGON ((39.05539300839629 45.06807266543381,...",963
3,1007,"POLYGON ((39.09033528104873 45.01774978801768,...",222
4,1013,"POLYGON ((38.97643748011409 45.12402103275863,...",2004


In [949]:
# добавим фичу население/кол-во кофеен
agg_all['geometry'] = agg_all['geometry'].astype(str)
agg_all_2_2 = agg_all[['id', 'geometry']].drop_duplicates().merge(table_people_osm_agg[['id', 'count_people']], how = 'left',on = 'id').merge(agg_all_cafe[['id' , 'counts']], how = 'left', on = 'id')
agg_all_2_2['geometry'] = agg_all_2_2['geometry'].apply(wkt.loads)

agg_all_2_2 = agg_all_2_2.fillna(1)
agg_all_2_2['feature_peop_cafe'] = agg_all_2_2['count_people']/agg_all_2_2['counts']

create_choropleth(agg_all_2_2, gpd.GeoSeries(agg_all_2_2.set_index('id')['geometry']).to_json(), ["id","feature_peop_cafe"], 'feature_2_2', "feature_2_2", bins = 8)

In [950]:
# выберем ТОП 5 гексагонов для дальнейшего анализа и поиска арендного помещения =)
top_5_osm = agg_all_2_2.sort_values('feature_peop_cafe', ascending = False).head(5)
create_choropleth(top_5_osm, gpd.GeoSeries(top_5_osm.set_index('id')['geometry']).to_json(), ["id","feature_peop_cafe"], 'feature_peop_cafe', "feature_peop_cafe", bins = 8)

In [951]:
top_5

Unnamed: 0,id,geometry,counts_poi,counts,feature_1
178,664,"POLYGON ((39.01886039448301 45.04893728832407,...",744,1.0,744.0
102,1013,"POLYGON ((38.97643748011409 45.12402103275863,...",672,1.0,672.0
155,162,"POLYGON ((39.00690764829994 45.11885260457239,...",506,1.0,506.0
179,654,"POLYGON ((39.01886039448301 45.04893728832407,...",467,1.0,467.0
89,377,"POLYGON ((38.96731065242864 45.08755844755686,...",464,1.0,464.0


### На данных Яндекса

In [1147]:
# spatial join - пересекаем гексагоны с объектами (определяем какие объекты находятся в разрезе каждого гексагона)
data_yandex['lat'] = data_yandex['geometry.coordinates'].apply(lambda x: x[1])
data_yandex['lon'] = data_yandex['geometry.coordinates'].apply(lambda x: x[0])

gdf_1 = gpd.GeoDataFrame(data_yandex, geometry=gpd.points_from_xy(data_yandex.lon, data_yandex.lat))

gdf_2 = pd.DataFrame(polygons, columns = ['geometry'])
gdf_2['polylines'] = polylines
gdf_2['geometry'] = gdf_2['geometry'].astype(str)
geometry_uniq = pd.DataFrame(gdf_2['geometry'].drop_duplicates())
geometry_uniq['id'] = np.arange(len(geometry_uniq)).astype(str)
gdf_2 = gdf_2.merge(geometry_uniq, on = 'geometry')
gdf_2['id'] = gdf_2.index.astype(str)
gdf_2['geometry'] = gdf_2['geometry'].apply(wkt.loads)
gdf_2 = gpd.GeoDataFrame(gdf_2, geometry='geometry')

itog_table_ya = gpd.sjoin(gdf_2, gdf_1, how='left', op='intersects')
itog_table_ya = itog_table_ya.dropna()
itog_table_ya.head()

Unnamed: 0,geometry,polylines,id,index_right,type,geometry.type,geometry.coordinates,properties.name,properties.description,properties.boundedBy,...,properties.CompanyMetaData.name,properties.CompanyMetaData.address,properties.CompanyMetaData.url,properties.CompanyMetaData.Phones,properties.CompanyMetaData.Categories,properties.CompanyMetaData.Hours.text,properties.CompanyMetaData.Hours.Availabilities,category,lat,lon
3,"POLYGON ((38.95864 45.03470, 38.96538 45.03447...","[(45.07775256155123, 38.89681124198797), (45.0...",3,318.0,Feature,Point,"[38.966421, 45.041024]",Транскапиталбанк,"ул. Фрунзе, 169, Центральный округ, микрорайон...","[[38.9623155, 45.03811293], [38.9705265, 45.04...",...,Транскапиталбанк,"Россия, Краснодар, Центральный округ, микрорай...",https://www.tkbbank.ru/,"[{'type': 'phone', 'formatted': '8 (800) 100-3...","[{'class': 'currency exchange', 'name': 'Банко...","ежедневно, 06:00–23:00","[{'Intervals': [{'from': '06:00:00', 'to': '23...",Банкомат,45.041024,38.966421
3,"POLYGON ((38.95864 45.03470, 38.96538 45.03447...","[(45.07775256155123, 38.89681124198797), (45.0...",3,13.0,Feature,Point,"[38.965394, 45.036484]","Три Богатыря, мужская баня","ул. Фрунзе, 135, Центральный округ, микрорайон...","[[38.9612885, 45.03357293], [38.9694995, 45.03...",...,"Три Богатыря, мужская баня","Россия, Краснодар, Центральный микрорайон, ули...",http://bogatyry.ru/,"[{'type': 'phone', 'formatted': '+7 (861) 243-...","[{'class': 'spa', 'name': 'Баня'}, {'class': '...",пн 13:00–00:00; вт-вс 10:00–00:00,"[{'Intervals': [{'from': '13:00:00', 'to': '00...",Достопримечательность,45.036484,38.965394
3,"POLYGON ((38.95864 45.03470, 38.96538 45.03447...","[(45.07775256155123, 38.89681124198797), (45.0...",3,168.0,Feature,Point,"[38.96692, 45.036969]",Альфа-Банк,"ул. Будённого, 117/1, Центральный округ, микро...","[[38.9628145, 45.03405843], [38.9710255, 45.03...",...,Альфа-Банк,"Россия, Краснодар, микрорайон Центральный, ули...",https://alfabank.ru/,"[{'type': 'phone', 'formatted': '8 (800) 200-0...","[{'class': 'currency exchange', 'name': 'Банко...",пн-пт 07:00–19:00,"[{'Intervals': [{'from': '07:00:00', 'to': '19...",Банкомат,45.036969,38.96692
3,"POLYGON ((38.95864 45.03470, 38.96538 45.03447...","[(45.07775256155123, 38.89681124198797), (45.0...",3,181.0,Feature,Point,"[38.964624, 45.037404]",Атмосфера,"ул. Будённого, 105, микрорайон Центральный, Кр...","[[38.960519, 45.03449293], [38.968729, 45.0403...",...,Атмосфера,"Россия, Краснодар, микрорайон Центральный, ули...",https://atmosfera-rest.ru/,"[{'type': 'phone', 'formatted': '+7 (938) 432-...","[{'class': 'restaurants', 'name': 'Ресторан'},...","ежедневно, 07:00–00:00","[{'Intervals': [{'from': '07:00:00', 'to': '00...",Кафе,45.037404,38.964624
3,"POLYGON ((38.95864 45.03470, 38.96538 45.03447...","[(45.07775256155123, 38.89681124198797), (45.0...",3,111.0,Feature,Point,"[38.964624, 45.037404]",Атмосфера,"ул. Будённого, 105, микрорайон Центральный, Кр...","[[38.960519, 45.03449293], [38.968729, 45.0403...",...,Атмосфера,"Россия, Краснодар, микрорайон Центральный, ули...",https://atmosfera-rest.ru/,"[{'type': 'phone', 'formatted': '+7 (938) 432-...","[{'class': 'restaurants', 'name': 'Ресторан'},...","ежедневно, 07:00–00:00","[{'Intervals': [{'from': '07:00:00', 'to': '00...",Ресторан,45.037404,38.964624


In [628]:
itog_table_ya = pd.DataFrame(itog_table_ya).dropna()
itog_table_ya['geometry'] = itog_table_ya['geometry'].astype(str) #для groupby, merge

agg_all_ya = itog_table_ya.groupby(['geometry','category','polylines','id'], as_index = False).agg({'lat':'count'}).rename(columns = {'lat':'counts'})
agg_all_ya['geometry'] = agg_all_ya['geometry'].apply(wkt.loads) #возвращаем формат геометрий

In [583]:
# посмотрим как распределены кофейни 
agg_all_ya_cafe = agg_all_ya.query("category in('Кофейня')")[["geometry","counts"]]
agg_all_ya_cafe['id'] = agg_all_ya_cafe.index.astype(str)
data_geo_1 = gpd.GeoSeries(agg_all_ya_cafe.set_index('id')["geometry"]).to_json()
create_choropleth(agg_all_ya_cafe, data_geo_1, ["id","counts"], 'Cafe counts', 'counts')

  aout[:] = out


In [1103]:
# т. к. в Яндексе мы не можем спарсить здания - возьмем уже готовую фичу населения у ОСМ, а кофейни у Яндекса
agg_all_ya['geometry'] = agg_all_ya['geometry'].astype(str)

##### переделать джойн 
agg_all_2_ya = geometry_uniq[['id', 'geometry']].drop_duplicates().merge(table_people_osm_agg[['id', 'count_people']], how = 'left',on = 'id').merge(agg_all_ya_cafe[['id' , 'counts']], how = 'left', on = 'id')
agg_all_2_ya['geometry'] = agg_all_2_ya['geometry'].apply(wkt.loads)
agg_all_2_ya.head()

agg_all_2_ya = agg_all_2_ya.fillna(1)
agg_all_2_ya['feature_2'] = agg_all_2_ya['count_people']/agg_all_2_ya['counts']
agg_all_2_ya['feature_2'] = agg_all_2_ya['feature_2'].astype(int)
create_choropleth(agg_all_2_ya, gpd.GeoSeries(agg_all_2_ya.set_index('id')['geometry']).to_json(), ["id","feature_2"], 'feature_2', "feature_2", bins = 8)

In [1104]:
# выберем ТОП 5 гексагонов для дальнейшего анализа и поиска арендного помещения =) на основе feature_2
top_5_f2 = agg_all_2_ya.sort_values('feature_2', ascending = False).head(5)
create_choropleth(top_5_f2, gpd.GeoSeries(top_5_f2.set_index('id')['geometry']).to_json(), ["id","feature_2"], 'feature_2', "feature_2", bins = 8)

In [956]:
top_5_f2

Unnamed: 0,id,geometry,count_people,counts,feature_2
122,582,"POLYGON ((39.00755853982577 45.09430882987476,...",23007.0,1.0,23007
146,944,"POLYGON ((39.02538478832323 45.05688247599272,...",18840.0,1.0,18840
131,177,"POLYGON ((39.01495059420091 45.06951937005982,...",18543.0,1.0,18543
159,1070,"POLYGON ((39.03517329755768 45.06879775752244,...",14058.0,1.0,14058
97,1075,"POLYGON ((38.99428848646149 45.08660520148189,...",13407.0,1.0,13407


In [634]:
# посчитаем фичу 3 - poi якоря yandex/кофейни
poi_interest_yandex = agg_all_ya.query("category not in('Кофейня')").groupby(['id'], as_index = False).agg({'counts':'sum'}).rename(columns = {'counts':'counts_poi_yandex'})

agg_all_3_ya = agg_all_ya[['id', 'geometry']].drop_duplicates().merge(poi_interest_yandex[['id', 'counts_poi_yandex']], how = 'left',on = 'id').merge(agg_all_ya_cafe[['id' , 'counts']], how = 'left', on = 'id')
agg_all_3_ya['geometry'] = agg_all_3_ya['geometry'].apply(wkt.loads)
agg_all_3_ya.head()

agg_all_3_ya = agg_all_3_ya.fillna(1)
agg_all_3_ya['feature_3'] = agg_all_3_ya['counts_poi_yandex']/agg_all_3_ya['counts']
agg_all_3_ya['feature_3'] = agg_all_3_ya['feature_3'].astype(int)
create_choropleth(agg_all_3_ya, gpd.GeoSeries(agg_all_3_ya.set_index('id')['geometry']).to_json(), ["id","feature_3"], 'feature_3', "feature_3", bins_ = False)


  aout[:] = out


In [960]:
# выберем ТОП 5 гексагонов для дальнейшего анализа и поиска арендного помещения =) на основе feature_3
top_5_f3 = agg_all_3_ya.sort_values('feature_3', ascending = False).head(5)
create_choropleth(top_5_f3, gpd.GeoSeries(top_5_f3.set_index('id')['geometry']).to_json(), ["id","feature_3"], 'feature_3', "feature_3", bins = 8)

In [963]:
top_5_f3

Unnamed: 0,id,geometry,counts_poi_yandex,counts,feature_3
68,125,"POLYGON ((38.96885692881101 45.0302559543026, ...",104.0,1.0,104
71,992,"POLYGON ((38.97537692181696 45.03820713075687,...",88.0,1.0,88
69,140,"POLYGON ((38.96885692881101 45.0302559543026, ...",80.0,1.0,80
82,1154,"POLYGON ((38.97841752402051 45.05036905888618,...",68.0,1.0,68
64,481,"POLYGON ((38.9679736155211 45.06300417841472, ...",63.0,1.0,63


In [1220]:
str(top_5_f3['geometry'].iloc[0])

'POLYGON ((38.96885692881101 45.0302559543026, 38.96559731617488 45.02627986658541, 38.96907764338795 45.02206725737352, 38.97581699683202 45.02183068208745, 38.97907672272515 45.02580657145486, 38.97559698199402 45.03001923445393, 38.96885692881101 45.0302559543026))'

## Huff (будет дополняться)

In [1165]:
# остановимся на том, что наиболее интересная область поиска для нас: 
# (да, OSM дает выбросы, но можно взять ближайшие гексагоны для анализа)
visualize_polygons(top_5_f3['geometry'].head(1))

In [1168]:
# рассчитаем центроиды, с помощью KDTree выведем 6 ближайших соседей
lat, lon = get_lat_lon(top_5_f3['geometry'])

top_5_f3['lat'] = lat
top_5_f3['lon'] = lon

geometry_uniq['geometry'] = geometry_uniq['geometry'].apply(wkt.loads)

lat, lon = get_lat_lon(geometry_uniq['geometry'])
geometry_uniq['lat'] = lat
geometry_uniq['lon'] = lon

start_time = time.time()


tree = KDTree(np.array(geometry_uniq[['lat', 'lon']].drop_duplicates()))

dist, indexes = tree.query(np.array(top_5_f3[['lat','lon']].head(1)), k=7, workers = -1)

print("--- %s seconds ---" % (time.time() - start_time))

--- 0.004685163497924805 seconds ---


In [1169]:
_7_wkt = np.array(geometry_uniq['geometry'])[indexes].tolist()[0]
_7_wkt = pd.DataFrame(_7_wkt, columns = ['geometry'])
# _7_wkt['geometry'] = _7_wkt['geometry'].apply(wkt.loads)
_7_wkt

Unnamed: 0,geometry
0,"POLYGON ((38.96885692881101 45.0302559543026, ..."
1,"POLYGON ((38.96885692881101 45.0302559543026, ..."
2,"POLYGON ((38.97929636508905 45.01761776347304,..."
3,"POLYGON ((38.97907672272515 45.02580657145486,..."
4,"POLYGON ((38.95885711713477 45.02651593397795,..."
5,"POLYGON ((38.97907672272515 45.02580657145486,..."
6,"POLYGON ((38.96885692881101 45.0302559543026, ..."


In [1170]:
visualize_polygons(_7_wkt['geometry'])

In [1179]:
# посмотрим на кофейни
g = list(_7_wkt['geometry'])
_7_wkt['geometry'] = _7_wkt['geometry'].astype(str)
itog_table_ya['geometry'] = itog_table_ya['geometry'].astype(str)
_7_poi = itog_table_ya.query("geometry in @g and category == 'Кофейня'")
visualize_points(_7_poi.loc[:,'lat'], 
                 _7_poi.loc[:,'lon'], 
                 _7_poi.loc[:,'properties.name'], 
                 []) 

#### Рейтинг
Что для нас важно выбрать как критерий?
- расстояние до ближайшего офиса/университета/жилого дома
- кол-во офисов
- кол-во университетов/колледжей
- население
- кол-во организаций
- кол-во административных/коммерческих зданий)

В качестве тесте возьемем 3 потенциальных локации, где есть аренда под кофейню: 45.028495, 38.972001; 45.027036, 38.971590; 45.025415, 38.970993

In [1187]:
list(agg_all_ya['category'].drop_duplicates())

['Достопримечательность',
 'Фитнес-клуб',
 'Школа',
 'Ресторан',
 'Быстрое питание',
 'Банкомат',
 'Продукты',
 'Салон красоты',
 'Торговый центр',
 'Гостиница',
 'Кофейня',
 'Кафе',
 'Больница',
 'Университет',
 'Банк',
 'Колледж',
 'Бизнес-центр']

In [1199]:
poi = pd.DataFrame([45.028495,45.027036,45.025415], columns = ['lat'])
poi['lon'] = [38.972001,38.971590,38.970993]
poi

Unnamed: 0,lat,lon
0,45.028495,38.972001
1,45.027036,38.97159
2,45.025415,38.970993


In [1213]:
# ближайший офис
# agg_all_ya['geometry'] = agg_all_ya['geometry'].apply(wkt.loads)
lat, lon = get_lat_lon(agg_all_ya['geometry'])
agg_all_ya['lat'] = lat
agg_all_ya['lon'] = lon

office = agg_all_ya.query("category == 'Бизнес-центр'")
tree = KDTree(np.array(office[['lat', 'lon']]))

dist, indexes_b = tree.query(np.array(poi), k=1, workers = -1)

In [1210]:
# ближайший универ
univers = agg_all_ya.query("category in ('Университет', 'Колледж')")
tree = KDTree(np.array(univers[['lat', 'lon']]))

dist, indexes_u = tree.query(np.array(poi), k=1, workers = -1)

In [1216]:
# ближайший многоэтажный дом
aparts = table_people.query("type == 'apartments'")
tree = KDTree(np.array(aparts[['lat', 'lon']]))

dist, indexes_a = tree.query(np.array(poi), k=1, workers = -1)

In [None]:
# на основе рейтинга рассчитаем Huff