In [1]:
import pandas as pd
import numpy as np
from numpy import inf, mean, median
import math
import datetime
import json
import os
import glob
import pickle
import requests
# import requests_cache
import plotly.graph_objects as go
import plotly.express as px
import folium
from folium.plugins import MarkerCluster, HeatMap
import geopandas as gpd
import osm2geojson
import geojson
import shapely
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from geopy.distance import geodesic

In [2]:
pd.options.display.max_rows = 100

# Roads

In [3]:
with open('osm_roads_gdf.pickle', 'rb') as _f:
    osm_roads_gdf = pickle.load(_f)

In [4]:
osm_roads_gdf.shape

(57131, 9)

In [5]:
osm_roads_gdf.head()

Unnamed: 0,geometry,type,id,tags,highway,lanes,oneway,surface,representative_point
0,"LINESTRING (34.82303 57.38299, 34.82630 57.380...",way,4412349,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,2,yes,asphalt,POINT (34.82630 57.38061)
1,"LINESTRING (34.74326 57.44017, 34.74535 57.438...",way,4412351,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.74535 57.43867)
2,"LINESTRING (34.66128 57.49754, 34.66678 57.493...",way,4412353,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.67413 57.48855)
3,"LINESTRING (31.35035 56.34085, 31.35280 56.340...",way,4420591,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.47337 56.32730)
4,"LINESTRING (31.58938 56.32759, 31.59669 56.328...",way,4420592,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.63790 56.33436)


In [21]:
osm_roads_gdf['road_lon'] = osm_roads_gdf['representative_point'].x
osm_roads_gdf['road_lat'] = osm_roads_gdf['representative_point'].y

In [22]:
osm_roads_gdf.head()

Unnamed: 0,geometry,type,id,tags,highway,lanes,oneway,surface,representative_point,road_lon,road_lat
0,"LINESTRING (34.82303 57.38299, 34.82630 57.380...",way,4412349,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,2,yes,asphalt,POINT (34.82630 57.38061),34.826305,57.380612
1,"LINESTRING (34.74326 57.44017, 34.74535 57.438...",way,4412351,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.74535 57.43867),34.745351,57.438669
2,"LINESTRING (34.66128 57.49754, 34.66678 57.493...",way,4412353,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.67413 57.48855),34.674135,57.488555
3,"LINESTRING (31.35035 56.34085, 31.35280 56.340...",way,4420591,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.47337 56.32730),31.473373,56.327296
4,"LINESTRING (31.58938 56.32759, 31.59669 56.328...",way,4420592,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.63790 56.33436),31.637903,56.334365


# Points

In [6]:
data_points = pd.DataFrame({'kladr': [
    '0200000101500',
    '2700000100000',
    '5501900001100',
    '2801800001600',
    '2800000400000',
    '3301000006100',
    '3800000300000',
    '5401700005600',
    '5003200002100',
    '5003200003000',
    '5401800000600',
    '5401800001000',
    '3813300000700',
    '3813300001000',
    '5003200003100',
    '5003200005100',
    '5401800001200',
    '5401900000200',
    '5003200006500',
    '5003200007300',
    '3813300001100',
    '3813300001200',
    '5401900001500',
    '5401900002800',
    '3813300001500',
    '3813300002000',
    '5003200009200',
    '5003200010000',
    '5401900002900',
    '5402000000400',
    '3813300003300',
    '3813300003600',
    '5003200010400',
    '5003200010700',
    '5402000000500',
    '5402000000800',
    '3813300003700',
    '3813300003800',
    '3813300004500',
    '4000000107200',
    '5003200011900',
]})

In [7]:
def kladr_to_address_api(kladr_id):   
  
    data = {
        "query": kladr_id
    }

    api_url = 'https://suggestions.dadata.ru/suggestions/api/4_1/rs/findById/fias'
    headers = {
        'content-type': 'application/json',
        'Authorization': 'Token 79abf89d58871ed1df79b83126f8f8c2362e51db'
    }
    response = requests.post(api_url, json=data, headers=headers)
    adress_json = response.json()
    adress_str = adress_json['suggestions'][0]['value']
    return adress_str

In [8]:
def address_to_geo_coord_api(address):
    r = requests.get(f'http://search.maps.sputnik.ru/search/addr?q={address}')
    response = r.json()
    coordinates = response['result']['address'][0]['features'][0]['geometry']['geometries'][0]['coordinates']
    coordinates = coordinates[::-1]
    return coordinates

In [9]:
addresses = []
coordinates = []
for kladr in data_points['kladr']:
    address = kladr_to_address_api(kladr)
    addresses.append(address)
    coordinate = address_to_geo_coord_api(address)
    coordinates.append(coordinate)
    print(address, coordinate)
data_points['address'] = addresses
data_points['coordinates'] = coordinates

Респ Башкортостан, г Уфа, деревня Жилино [54.656384, 56.06246]
Хабаровский край, г Хабаровск [48.481403, 135.07693]
Омская обл, Одесский р-н, село Желанное [54.166344, 72.575775]
Амурская обл, Тамбовский р-н, село Муравьевка [49.837215, 127.73236]
Амурская обл, г Зея [53.740356, 127.27162]
Владимирская обл, Меленковский р-н, деревня Левенда [55.31677, 41.793613]
Иркутская обл, г Иркутск [52.289597, 104.28059]
Новосибирская обл, Кыштовский р-н, деревня Ядкан [56.733883, 76.515045]
Московская обл, г Серпухов, деревня Арнеево [54.915524, 37.41955]
Московская обл, г Серпухов, деревня Борисово [55.35957, 38.047356]
Новосибирская обл, Маслянинский р-н, село Большой Изырак [54.514095, 84.27022]
Новосибирская обл, Маслянинский р-н, село Дубровка [54.46441, 84.721375]
Иркутская обл, Эхирит-Булагатский р-н, деревня Верхняя Идыга [52.92196, 104.562065]
Иркутская обл, Эхирит-Булагатский р-н, село Захал [52.602913, 104.73313]
Московская обл, г Серпухов, деревня Бутурлино [54.92471, 37.484756]
Моско

In [10]:
data_points['lat'], data_points['lon'] = zip(*data_points['coordinates'])

In [11]:
data_points.shape

(41, 5)

In [12]:
data_points.head()

Unnamed: 0,kladr,address,coordinates,lat,lon
0,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246
1,2700000100000,"Хабаровский край, г Хабаровск","[48.481403, 135.07693]",48.481403,135.07693
2,5501900001100,"Омская обл, Одесский р-н, село Желанное","[54.166344, 72.575775]",54.166344,72.575775
3,2801800001600,"Амурская обл, Тамбовский р-н, село Муравьевка","[49.837215, 127.73236]",49.837215,127.73236
4,2800000400000,"Амурская обл, г Зея","[53.740356, 127.27162]",53.740356,127.27162


In [49]:
map_points = folium.Map(width=1200, height=700, location=[55.75, 37.60], zoom_start=5)

for index, row in data_points.iterrows():
    folium.CircleMarker(location=tuple(row[['lat', 'lon']]),
                        radius=1,
                        fill_color='red',
                        color=None,
                        fill_opacity=1,
                        popup=row['address'],
                       ).add_to(map_points)

In [None]:
map_points

# Points To Roads

In [23]:
%%time
data_points2roads = pd.merge(data_points.loc[pd.notnull(data_points['lat'])],
                             osm_roads_gdf,
                             how='cross',
                            )

CPU times: user 3.22 s, sys: 1.57 s, total: 4.79 s
Wall time: 4.79 s


In [24]:
data_points2roads.shape

(2342371, 16)

In [25]:
data_points2roads.head()

Unnamed: 0,kladr,address,coordinates,lat,lon,geometry,type,id,tags,highway,lanes,oneway,surface,representative_point,road_lon,road_lat
0,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,"LINESTRING (34.82303 57.38299, 34.82630 57.380...",way,4412349,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,2,yes,asphalt,POINT (34.82630 57.38061),34.826305,57.380612
1,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,"LINESTRING (34.74326 57.44017, 34.74535 57.438...",way,4412351,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.74535 57.43867),34.745351,57.438669
2,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,"LINESTRING (34.66128 57.49754, 34.66678 57.493...",way,4412353,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.67413 57.48855),34.674135,57.488555
3,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,"LINESTRING (31.35035 56.34085, 31.35280 56.340...",way,4420591,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.47337 56.32730),31.473373,56.327296
4,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246,"LINESTRING (31.58938 56.32759, 31.59669 56.328...",way,4420592,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.63790 56.33436),31.637903,56.334365


In [26]:
def f_dist_simplifier_lat(city_lon_lat_point_lon_lat, max_val=6):
    city_lon = city_lon_lat_point_lon_lat[0]
    city_lat = city_lon_lat_point_lon_lat[1]
    point_lon = city_lon_lat_point_lon_lat[2]
    point_lat = city_lon_lat_point_lon_lat[3]
    if ((city_lat < point_lat + max_val) and
        (city_lat > point_lat - max_val)):
        return 1
    else:
        return 0

In [27]:
def f_dist_simplifier_lon(city_lon_lat_point_lon_lat, max_val=6):
    city_lon = city_lon_lat_point_lon_lat[0]
    city_lat = city_lon_lat_point_lon_lat[1]
    point_lon = city_lon_lat_point_lon_lat[2]
    point_lat = city_lon_lat_point_lon_lat[3]
    max_val = max_val / math.cos(math.radians(point_lat))
    if ((city_lon < point_lon + max_val) and
        (city_lon > point_lon - max_val)):
        return 1
    else:
        return 0

In [28]:
%%time
data_points2roads['simplifier_sign'] = data_points2roads[['road_lon', 'road_lat', 'lon', 'lat']].apply(f_dist_simplifier_lat, axis=1, args=(3,))

CPU times: user 1min 24s, sys: 1.14 s, total: 1min 25s
Wall time: 1min 25s


In [30]:
data_points2roads = data_points2roads.loc[data_points2roads['simplifier_sign']==1]

In [31]:
data_points2roads.shape

(1115049, 17)

In [32]:
%%time
data_points2roads['simplifier_sign'] = data_points2roads[['road_lon', 'road_lat', 'lon', 'lat']].apply(f_dist_simplifier_lon, axis=1, args=(3,))

CPU times: user 1min 5s, sys: 1.06 s, total: 1min 6s
Wall time: 1min 6s


In [33]:
data_points2roads = data_points2roads.loc[data_points2roads['simplifier_sign']==1]

In [34]:
data_points2roads.shape

(227909, 17)

In [35]:
def f_dist(lon_lat_1_2):
    lon_1 = lon_lat_1_2[0]
    lat_1 = lon_lat_1_2[1]
    lon_2 = lon_lat_1_2[2]
    lat_2 = lon_lat_1_2[3]
    if pd.isnull(lon_1):
        return None
    if pd.isnull(lat_1):
        return None
    if pd.isnull(lon_2):
        return None
    if pd.isnull(lat_2):
        return None
    return geodesic((lat_1, lon_1), (lat_2, lon_2)).km

In [36]:
%%time
data_points2roads['dist'] = data_points2roads[['road_lon', 'road_lat', 'lon', 'lat']].apply(f_dist, axis=1)

CPU times: user 2min 25s, sys: 1.75 s, total: 2min 27s
Wall time: 2min 28s


In [37]:
data_points2roads.head().T

Unnamed: 0,11986,11990,12026,12151,12152
kladr,0200000101500,0200000101500,0200000101500,0200000101500,0200000101500
address,"Респ Башкортостан, г Уфа, деревня Жилино","Респ Башкортостан, г Уфа, деревня Жилино","Респ Башкортостан, г Уфа, деревня Жилино","Респ Башкортостан, г Уфа, деревня Жилино","Респ Башкортостан, г Уфа, деревня Жилино"
coordinates,"[54.656384, 56.06246]","[54.656384, 56.06246]","[54.656384, 56.06246]","[54.656384, 56.06246]","[54.656384, 56.06246]"
lat,54.656384,54.656384,54.656384,54.656384,54.656384
lon,56.06246,56.06246,56.06246,56.06246,56.06246
geometry,"LINESTRING (51.191134 52.853552, 51.191463 52....","LINESTRING (51.070363 52.868606, 51.073112 52....","LINESTRING (51.70251 52.765192, 51.702642 52.7...","LINESTRING (51.220868 52.847263, 51.222104 52....","LINESTRING (51.222104 52.846999, 51.227666 52...."
type,way,way,way,way,way
id,15366175,15508202,25211310,90448681,90448790
tags,"{'highway': 'trunk', 'lanes': '2', 'lit': 'no'...","{'highway': 'trunk', 'lanes': '2', 'lit': 'no'...","{'description': 'Подъезд М5-Оренбург', 'highwa...","{'bridge': 'yes', 'highway': 'trunk', 'lanes':...","{'highway': 'trunk', 'lanes': '2', 'maxspeed':..."
highway,trunk,trunk,trunk,trunk,trunk


In [38]:
print(f"dist_min = {min(data_points2roads['dist'])} km, dist_max = {max(data_points2roads['dist'])} km")

dist_min = 0.762901106855363 km, dist_max = 473.7697927006494 km


In [130]:
with open('data_points2roads.pickle', 'wb') as _f:
    pickle.dump(data_points2roads, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points2roads.pickle', 'rb') as _f:
    data_points2roads = pickle.load(_f)

# Update Points

## The Nearest Road

In [39]:
%%time
data_points2roads_gr = data_points2roads.loc[:, ['kladr', 'dist']].groupby('kladr').min().reset_index()

CPU times: user 75.2 ms, sys: 0 ns, total: 75.2 ms
Wall time: 81.9 ms


In [40]:
data_points2roads_gr.rename(columns={'dist':'dist_to_road'}, inplace=True)

In [41]:
data_points2roads_gr.shape

(41, 2)

In [42]:
data_points2roads_gr.loc[pd.isnull(data_points2roads_gr['dist_to_road'])]

Unnamed: 0,kladr,dist_to_road


In [43]:
data_points2roads_gr.loc[pd.isnull(data_points2roads_gr['dist_to_road']), 'dist_to_road'] = 500

In [44]:
data_points2roads_gr.head()

Unnamed: 0,kladr,dist_to_road
0,200000101500,0.762901
1,2700000100000,2.08458
2,2800000400000,79.173231
3,2801800001600,40.830141
4,3301000006100,101.713331


## Update

In [45]:
%%time
data_points = data_points.merge(data_points2roads_gr, right_on='kladr', left_on='kladr', how='left')

CPU times: user 2.82 ms, sys: 3.95 ms, total: 6.77 ms
Wall time: 6.22 ms


In [46]:
data_points.shape

(41, 6)

In [130]:
with open('data_points.pickle', 'wb') as _f:
    pickle.dump(data_points, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points.pickle', 'rb') as _f:
    data_points = pickle.load(_f)