In [1]:
import pandas as pd
import numpy as np
from numpy import inf, mean, median
import math
import datetime
import json
import os
import glob
import pickle
import requests
# import requests_cache
import plotly.graph_objects as go
import plotly.express as px
import folium
from folium.plugins import MarkerCluster, HeatMap
import geopandas as gpd
import osm2geojson
import geojson
import shapely
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString

In [2]:
pd.options.display.max_rows = 100

# Cities

## OSM Cities

In [3]:
with open('data_osm_cities.pickle', 'rb') as _f:
    data_osm_cities = pickle.load(_f)

In [4]:
data_osm_cities.shape

(2652, 5)

In [5]:
data_osm_cities.head()

Unnamed: 0,region,city_name,city_lon,city_lat,city_population
0,Адыгея,Адыгейск,39.190593,44.883197,12689.0
1,Адыгея,Гиагинская,40.060501,44.873023,13954.0
2,Адыгея,Кошехабль,40.499439,44.897871,7183.0
3,Адыгея,Красногвардейское,39.581746,45.134793,9419.0
4,Адыгея,Майкоп,40.104261,44.605519,144055.0


## Regions

In [6]:
with open('data_regions.pickle', 'rb') as _f:
    data_regions = pickle.load(_f)

In [7]:
data_regions.shape

(85, 12)

In [8]:
data_regions.head()

Unnamed: 0,region_name,region_type,center_name,region_area,region_population,region_code,region_fo,region_density,region,center_lon,center_lat,center_population
0,Адыгея,республика,Майкоп,7792,449171,1,Южный,57.645149,Адыгея,40.104261,44.605519,144055.0
1,Алтай,республика,Горно-Алтайск,92903,213703,4,Сибирский,2.300281,Республика Алтай,85.968646,51.944865,63845.0
2,Алтайский,край,Барнаул,167996,2384812,22,Сибирский,14.195648,Алтайский край,83.749388,53.340879,632723.0
3,Амурская,область,Благовещенск,361913,809873,28,Дальневосточный,2.237756,Амурская область,127.544173,50.320583,225091.0
4,Архангельская,область,Архангельск,589913,1183323,29,Северо-Западный,2.005928,Архангельская область,40.548922,64.564142,350982.0


# Points

In [9]:
data_points = pd.DataFrame({'kladr': [
    '0200000101500',
    '2700000100000',
    '5501900001100',
    '2801800001600',
    '2800000400000',
    '3301000006100',
    '3800000300000',
    '5401700005600',
    '5003200002100',
    '5003200003000',
    '5401800000600',
    '5401800001000',
    '3813300000700',
    '3813300001000',
    '5003200003100',
    '5003200005100',
    '5401800001200',
    '5401900000200',
    '5003200006500',
    '5003200007300',
    '3813300001100',
    '3813300001200',
    '5401900001500',
    '5401900002800',
    '3813300001500',
    '3813300002000',
    '5003200009200',
    '5003200010000',
    '5401900002900',
    '5402000000400',
    '3813300003300',
    '3813300003600',
    '5003200010400',
    '5003200010700',
    '5402000000500',
    '5402000000800',
    '3813300003700',
    '3813300003800',
    '3813300004500',
    '4000000107200',
    '5003200011900',
]})

In [10]:
def kladr_to_address_api(kladr_id):   
  
    data = {
        "query": kladr_id
    }

    api_url = 'https://suggestions.dadata.ru/suggestions/api/4_1/rs/findById/fias'
    headers = {
        'content-type': 'application/json',
        'Authorization': 'Token 79abf89d58871ed1df79b83126f8f8c2362e51db'
    }
    response = requests.post(api_url, json=data, headers=headers)
    adress_json = response.json()
    adress_str = adress_json['suggestions'][0]['value']
    return adress_str

In [11]:
def address_to_geo_coord_api(address):
    r = requests.get(f'http://search.maps.sputnik.ru/search/addr?q={address}')
    response = r.json()
    coordinates = response['result']['address'][0]['features'][0]['geometry']['geometries'][0]['coordinates']
    coordinates = coordinates[::-1]
    return coordinates

In [12]:
addresses = []
coordinates = []
for kladr in data_points['kladr']:
    address = kladr_to_address_api(kladr)
    addresses.append(address)
    coordinate = address_to_geo_coord_api(address)
    coordinates.append(coordinate)
    print(address, coordinate)
data_points['address'] = addresses
data_points['coordinates'] = coordinates

Респ Башкортостан, г Уфа, деревня Жилино [54.656384, 56.06246]
Хабаровский край, г Хабаровск [48.481403, 135.07693]
Омская обл, Одесский р-н, село Желанное [54.166344, 72.575775]
Амурская обл, Тамбовский р-н, село Муравьевка [49.837215, 127.73236]
Амурская обл, г Зея [53.740356, 127.27162]
Владимирская обл, Меленковский р-н, деревня Левенда [55.31677, 41.793613]
Иркутская обл, г Иркутск [52.289597, 104.28059]
Новосибирская обл, Кыштовский р-н, деревня Ядкан [56.733883, 76.515045]
Московская обл, г Серпухов, деревня Арнеево [54.915524, 37.41955]
Московская обл, г Серпухов, деревня Борисово [55.35957, 38.047356]
Новосибирская обл, Маслянинский р-н, село Большой Изырак [54.514095, 84.27022]
Новосибирская обл, Маслянинский р-н, село Дубровка [54.46441, 84.721375]
Иркутская обл, Эхирит-Булагатский р-н, деревня Верхняя Идыга [52.92196, 104.562065]
Иркутская обл, Эхирит-Булагатский р-н, село Захал [52.602913, 104.73313]
Московская обл, г Серпухов, деревня Бутурлино [54.92471, 37.484756]
Моско

In [13]:
data_points['lat'], data_points['lon'] = zip(*data_points['coordinates'])

In [14]:
data_points.shape

(41, 5)

In [15]:
data_points.head()

Unnamed: 0,kladr,address,coordinates,lat,lon
0,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246
1,2700000100000,"Хабаровский край, г Хабаровск","[48.481403, 135.07693]",48.481403,135.07693
2,5501900001100,"Омская обл, Одесский р-н, село Желанное","[54.166344, 72.575775]",54.166344,72.575775
3,2801800001600,"Амурская обл, Тамбовский р-н, село Муравьевка","[49.837215, 127.73236]",49.837215,127.73236
4,2800000400000,"Амурская обл, г Зея","[53.740356, 127.27162]",53.740356,127.27162


In [49]:
map_points = folium.Map(width=1200, height=700, location=[55.75, 37.60], zoom_start=5)

for index, row in data_points.iterrows():
    folium.CircleMarker(location=tuple(row[['lat', 'lon']]),
                        radius=1,
                        fill_color='red',
                        color=None,
                        fill_opacity=1,
                        popup=row['address'],
                       ).add_to(map_points)

In [None]:
map_points

# Points To Cities

In [16]:
%%time
data_points2cities = pd.merge(data_points.loc[pd.notnull(data_points['lat'])],
                              data_osm_cities,
                              how='cross',
                             )

CPU times: user 27.4 ms, sys: 39.3 ms, total: 66.8 ms
Wall time: 65.3 ms


In [17]:
data_points2cities.shape

(108732, 10)

In [18]:
data_points2cities.head()

Unnamed: 0,kladr,address,coordinates,lat,lon,region,city_name,city_lon,city_lat,city_population
0,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Адыгея,Адыгейск,39.190593,44.883197,12689.0
1,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Адыгея,Гиагинская,40.060501,44.873023,13954.0
2,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Адыгея,Кошехабль,40.499439,44.897871,7183.0
3,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Адыгея,Красногвардейское,39.581746,45.134793,9419.0
4,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Адыгея,Майкоп,40.104261,44.605519,144055.0


In [19]:
%%time
data_points2cities['simplifier_sign'] = 0
data_points2cities.loc[(data_points2cities['city_lat'] < data_points2cities['lat'] + 3) & (data_points2cities['city_lat'] > data_points2cities['lat'] - 3), 'simplifier_sign'] = 1

CPU times: user 6.6 ms, sys: 2.22 ms, total: 8.82 ms
Wall time: 7.37 ms


In [20]:
data_points2cities = data_points2cities.loc[data_points2cities['simplifier_sign']==1]

In [21]:
data_points2cities.shape

(51996, 11)

In [22]:
%%time
data_points2cities['simplifier_sign'] = 0
data_points2cities.loc[(data_points2cities['city_lon'] < data_points2cities['lon'] + 3 / np.cos(np.radians(data_points2cities['lat']))) &
                       (data_points2cities['city_lon'] > data_points2cities['lon'] - 3 / np.cos(np.radians(data_points2cities['lat']))),
                       'simplifier_sign'] = 1

CPU times: user 11.3 ms, sys: 0 ns, total: 11.3 ms
Wall time: 9.42 ms


In [23]:
data_points2cities = data_points2cities.loc[data_points2cities['simplifier_sign']==1]

In [24]:
data_points2cities.shape

(7961, 11)

In [25]:
def f_dist(df, col_lat_1, col_lon_1, col_lat_2, col_lon_2, col_dist):
    
    lat_1 = math.pi/180 * df[col_lat_1].to_numpy()
    lon_1 = math.pi/180 * df[col_lon_1].to_numpy()
    lat_2 = math.pi/180 * df[col_lat_2].to_numpy()
    lon_2 = math.pi/180 * df[col_lon_2].to_numpy()
    
    dlon = lon_1 - lon_2
    dlat = lat_1 - lat_2
    a = np.sin(dlat/2)**2 + np.cos(lat_1) * np.cos(lat_2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    dist = c * 6371.009
    
    df[col_dist] = dist
    
    return df

In [26]:
%%time
data_points2cities = f_dist(data_points2cities, 'city_lat', 'city_lon', 'lat', 'lon', 'dist')

CPU times: user 8.22 ms, sys: 220 µs, total: 8.44 ms
Wall time: 5.66 ms


In [27]:
data_points2cities.head()

Unnamed: 0,kladr,address,coordinates,lat,lon,region,city_name,city_lon,city_lat,city_population,simplifier_sign,dist
142,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Агидель,53.937421,55.893298,15616.0,1,192.427263
143,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Архангельское,56.774035,54.405749,5819.0,1,53.708584
144,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Аскарово,58.517337,53.335749,7634.0,1,217.491505
145,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Аскино,56.579966,56.089665,,1,162.692305
146,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Баймак,58.310364,52.594139,17475.0,1,273.022865


In [28]:
print(f"dist_min = {min(data_points2cities['dist'])} km, dist_max = {max(data_points2cities['dist'])} km")

dist_min = 0.5076134148903368 km, dist_max = 470.8631122026571 km


In [130]:
with open('data_points2cities.pickle', 'wb') as _f:
    pickle.dump(data_points2cities, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points2cities.pickle', 'rb') as _f:
    data_points2cities = pickle.load(_f)

# Update Points

## Population In The Circle

In [29]:
%%time
data_points2cities_pop_50 = data_points2cities.loc[(data_points2cities['dist']<50)&(pd.notnull(data_points2cities['city_population'])), ['kladr', 'city_population']].groupby('kladr').sum().reset_index()

CPU times: user 13.6 ms, sys: 959 µs, total: 14.6 ms
Wall time: 12.2 ms


In [30]:
data_points2cities_pop_50.rename(columns={'city_population':'population_50'}, inplace=True)

In [31]:
data_points2cities_pop_50.shape

(37, 2)

## The Biggest City In The Circle

In [32]:
%%time
data_points2cities_max_50 = data_points2cities.loc[(data_points2cities['dist']<50)&(pd.notnull(data_points2cities['city_population'])), ['kladr', 'city_population']].groupby('kladr').max().reset_index()

CPU times: user 12.6 ms, sys: 163 µs, total: 12.7 ms
Wall time: 10.2 ms


In [33]:
data_points2cities_max_50.rename(columns={'city_population':'city_population_max_50'}, inplace=True)

In [34]:
data_points2cities_max_50.shape

(37, 2)

In [35]:
%%time
data_points2cities_max_50 = data_points2cities_max_50.merge(data_points2cities, right_on='kladr', left_on='kladr', how='left')

CPU times: user 15.5 ms, sys: 11.6 ms, total: 27.1 ms
Wall time: 24.7 ms


In [36]:
data_points2cities_max_50.shape

(7747, 17)

In [37]:
%%time
data_points2cities_max_50 = data_points2cities_max_50.loc[(data_points2cities_max_50['dist']<50)&
                                                          (data_points2cities_max_50['city_population_max_50']==data_points2cities_max_50['city_population'])].reset_index(drop=True)

CPU times: user 6.54 ms, sys: 1.84 ms, total: 8.38 ms
Wall time: 6.1 ms


In [38]:
data_points2cities_max_50.shape

(37, 17)

In [39]:
data_points2cities_max_50.head()

Unnamed: 0,kladr,city_population_max_50,address,coordinates,lat,lon,region,city_name,city_lon,city_lat,city_population,simplifier_sign,dlon,dlat,a,c,dist
0,200000101500,1110976.0,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,Башкортостан,Уфа,55.993042,54.728227,1110976.0,1,-0.001212,0.001254,5.156552e-07,0.001436,9.149934
1,2700000100000,618150.0,"Хабаровский край, г Хабаровск","[48.481403, 135.07693]",48.481403,135.07693,Хабаровский край,Хабаровск,135.068078,48.473097,618150.0,1,-0.000154,-0.000145,7.875937e-09,0.000177,1.130809
2,2800000400000,23270.0,"Амурская обл, г Зея","[53.740356, 127.27162]",53.740356,127.27162,Амурская область,Зея,127.259946,53.737999,23270.0,1,-0.000204,-4.1e-05,4.05383e-09,0.000127,0.81128
3,2801800001600,7374.0,"Амурская обл, Тамбовский р-н, село Муравьевка","[49.837215, 127.73236]",49.837215,127.73236,Амурская область,Тамбовка,128.076614,50.101728,7374.0,1,0.006008,0.004617,9.061932e-06,0.006021,38.35741
4,3301000006100,109809.0,"Владимирская обл, Меленковский р-н, деревня Ле...","[55.31677, 41.793613]",55.31677,41.793613,Владимирская область,Муром,42.036276,55.568679,109809.0,1,0.004235,0.004397,6.275447e-06,0.00501,31.919861


In [40]:
data_points2cities_max_50.rename(columns={'region':'region_max_50', 'city_name':'city_name_max_50', 'city_lon':'city_lon_max_50', 'city_lat':'city_lat_max_50', 'dist':'city_dist_max_50'}, inplace=True)

## Update

In [41]:
%%time
data_points = data_points.merge(data_points2cities_pop_50, right_on='kladr', left_on='kladr', how='left')

CPU times: user 25.1 ms, sys: 284 µs, total: 25.4 ms
Wall time: 24.1 ms


In [42]:
data_points.shape

(41, 6)

In [43]:
%%time
data_points = data_points.merge(data_points2cities_max_50[['kladr', 'region_max_50', 'city_name_max_50', 'city_lon_max_50', 'city_lat_max_50', 'city_population_max_50', 'city_dist_max_50']], right_on='kladr', left_on='kladr', how='left')

CPU times: user 12.2 ms, sys: 0 ns, total: 12.2 ms
Wall time: 10.9 ms


In [44]:
data_points.shape

(41, 12)

In [45]:
data_points['region_code'] = data_points['kladr'].str[:2].astype('int')

In [46]:
data_points[['region_code']].groupby('region_code').size()

region_code
2      1
27     1
28     2
33     1
38    12
40     1
50    11
54    11
55     1
dtype: int64

In [47]:
%%time
data_points.loc[data_points['region_code']==80, 'region_code'] = 75
data_points.loc[data_points['region_code']==81, 'region_code'] = 59
data_points.loc[data_points['region_code']==82, 'region_code'] = 41
data_points.loc[data_points['region_code']==85, 'region_code'] = 38
data_points.loc[data_points['region_code']==88, 'region_code'] = 24

CPU times: user 6.43 ms, sys: 0 ns, total: 6.43 ms
Wall time: 5.97 ms


In [48]:
data_points[['region_code']].groupby('region_code').size()

region_code
2      1
27     1
28     2
33     1
38    12
40     1
50    11
54    11
55     1
dtype: int64

In [49]:
len(data_points['region_code'].unique())

9

In [50]:
%%time
data_points = data_points.merge(data_regions, right_on='region_code', left_on='region_code', how='left')

CPU times: user 11.4 ms, sys: 0 ns, total: 11.4 ms
Wall time: 9.6 ms


In [51]:
data_points.shape

(41, 24)

In [53]:
%%time
data_points['dlon'] = math.pi/180 * (data_points['center_lon'] - data_points['lon'])
data_points['dlat'] = math.pi/180 * (data_points['center_lat'] - data_points['lat'])
data_points['a'] = np.sin(data_points['dlat']/2)**2 + np.cos(math.pi/180 * data_points['center_lat']) * np.cos(math.pi/180 * data_points['lat']) * np.sin(data_points['dlon']/2)**2
data_points['c'] = 2 * np.arcsin(np.sqrt(data_points['a']))
data_points['center_dist'] = data_points['c'] * 6371.009

CPU times: user 12.2 ms, sys: 0 ns, total: 12.2 ms
Wall time: 10.9 ms


In [55]:
data_points.head().T

Unnamed: 0,0,1,2,3,4
kladr,0200000101500,2700000100000,5501900001100,2801800001600,2800000400000
address,"Респ Башкортостан, г Уфа, деревня Жилино","Хабаровский край, г Хабаровск","Омская обл, Одесский р-н, село Желанное","Амурская обл, Тамбовский р-н, село Муравьевка","Амурская обл, г Зея"
coordinates,"[54.656384, 56.06246]","[48.481403, 135.07693]","[54.166344, 72.575775]","[49.837215, 127.73236]","[53.740356, 127.27162]"
lat,54.656384,48.481403,54.166344,49.837215,53.740356
lon,56.06246,135.07693,72.575775,127.73236,127.27162
population_50,1191909.0,648762.0,6148.0,12670.0,23270.0
region_max_50,Башкортостан,Хабаровский край,Омская область,Амурская область,Амурская область
city_name_max_50,Уфа,Хабаровск,Одесское,Тамбовка,Зея
city_lon_max_50,55.993042,135.068078,72.962787,128.076614,127.259946
city_lat_max_50,54.728227,48.473097,54.209497,50.101728,53.737999


In [130]:
with open('data_points.pickle', 'wb') as _f:
    pickle.dump(data_points, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points.pickle', 'rb') as _f:
    data_points = pickle.load(_f)