In [3]:
import pandas as pd
import numpy as np
from numpy import inf, mean, median
import math
import datetime
import json
import os
import glob
import pickle
import requests
# import requests_cache
import plotly.graph_objects as go
import plotly.express as px
import folium
from folium.plugins import MarkerCluster, HeatMap
import geopandas as gpd
import osm2geojson
import geojson
import shapely
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from geopy.distance import geodesic

In [4]:
pd.options.display.max_rows = 100

# Roads

In [5]:
with open('data_osm_roads_gdf.pickle', 'rb') as _f:
    data_osm_roads_gdf = pickle.load(_f)

In [6]:
data_osm_roads_gdf.shape

(57131, 9)

In [7]:
data_osm_roads_gdf.head()

Unnamed: 0,geometry,type,id,tags,highway,lanes,oneway,surface,representative_point
0,"LINESTRING (34.82303 57.38299, 34.82630 57.380...",way,4412349,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,2,yes,asphalt,POINT (34.82630 57.38061)
1,"LINESTRING (34.74326 57.44017, 34.74535 57.438...",way,4412351,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.74535 57.43867)
2,"LINESTRING (34.66128 57.49754, 34.66678 57.493...",way,4412353,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.67413 57.48855)
3,"LINESTRING (31.35035 56.34085, 31.35280 56.340...",way,4420591,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.47337 56.32730)
4,"LINESTRING (31.58938 56.32759, 31.59669 56.328...",way,4420592,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.63790 56.33436)


In [8]:
data_osm_roads_gdf['road_lon'] = data_osm_roads_gdf['representative_point'].x
data_osm_roads_gdf['road_lat'] = data_osm_roads_gdf['representative_point'].y

In [9]:
data_osm_roads_gdf.head()

Unnamed: 0,geometry,type,id,tags,highway,lanes,oneway,surface,representative_point,road_lon,road_lat
0,"LINESTRING (34.82303 57.38299, 34.82630 57.380...",way,4412349,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,2,yes,asphalt,POINT (34.82630 57.38061),34.826305,57.380612
1,"LINESTRING (34.74326 57.44017, 34.74535 57.438...",way,4412351,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.74535 57.43867),34.745351,57.438669
2,"LINESTRING (34.66128 57.49754, 34.66678 57.493...",way,4412353,"{'highway': 'trunk', 'int_ref': 'E 105;AH8', '...",trunk,1,yes,asphalt,POINT (34.67413 57.48855),34.674135,57.488555
3,"LINESTRING (31.35035 56.34085, 31.35280 56.340...",way,4420591,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.47337 56.32730),31.473373,56.327296
4,"LINESTRING (31.58938 56.32759, 31.59669 56.328...",way,4420592,"{'highway': 'trunk', 'int_ref': 'E 22', 'lanes...",trunk,2,,asphalt,POINT (31.63790 56.33436),31.637903,56.334365


# Points

In [10]:
data_points = pd.DataFrame({'kladr': [
    '0200000101500',
    '2700000100000',
    '5501900001100',
    '2801800001600',
    '2800000400000',
    '3301000006100',
    '3800000300000',
    '5401700005600',
    '5003200002100',
    '5003200003000',
    '5401800000600',
    '5401800001000',
    '3813300000700',
    '3813300001000',
    '5003200003100',
    '5003200005100',
    '5401800001200',
    '5401900000200',
    '5003200006500',
    '5003200007300',
    '3813300001100',
    '3813300001200',
    '5401900001500',
    '5401900002800',
    '3813300001500',
    '3813300002000',
    '5003200009200',
    '5003200010000',
    '5401900002900',
    '5402000000400',
    '3813300003300',
    '3813300003600',
    '5003200010400',
    '5003200010700',
    '5402000000500',
    '5402000000800',
    '3813300003700',
    '3813300003800',
    '3813300004500',
    '4000000107200',
    '5003200011900',
]})

In [11]:
def kladr_to_address_api(kladr_id):   
  
    data = {
        "query": kladr_id
    }

    api_url = 'https://suggestions.dadata.ru/suggestions/api/4_1/rs/findById/fias'
    headers = {
        'content-type': 'application/json',
        'Authorization': 'Token 79abf89d58871ed1df79b83126f8f8c2362e51db'
    }
    response = requests.post(api_url, json=data, headers=headers)
    adress_json = response.json()
    adress_str = adress_json['suggestions'][0]['value']
    return adress_str

In [12]:
def address_to_geo_coord_api(address):
    r = requests.get(f'http://search.maps.sputnik.ru/search/addr?q={address}')
    response = r.json()
    coordinates = response['result']['address'][0]['features'][0]['geometry']['geometries'][0]['coordinates']
    coordinates = coordinates[::-1]
    return coordinates

In [13]:
addresses = []
coordinates = []
for kladr in data_points['kladr']:
    address = kladr_to_address_api(kladr)
    addresses.append(address)
    coordinate = address_to_geo_coord_api(address)
    coordinates.append(coordinate)
    print(address, coordinate)
data_points['address'] = addresses
data_points['coordinates'] = coordinates

Респ Башкортостан, г Уфа, деревня Жилино [54.656384, 56.06246]
Хабаровский край, г Хабаровск [48.481403, 135.07693]
Омская обл, Одесский р-н, село Желанное [54.166344, 72.575775]
Амурская обл, Тамбовский р-н, село Муравьевка [49.837215, 127.73236]
Амурская обл, г Зея [53.740356, 127.27162]
Владимирская обл, Меленковский р-н, деревня Левенда [55.31677, 41.793613]
Иркутская обл, г Иркутск [52.289597, 104.28059]
Новосибирская обл, Кыштовский р-н, деревня Ядкан [56.733883, 76.515045]
Московская обл, г Серпухов, деревня Арнеево [54.915524, 37.41955]
Московская обл, г Серпухов, деревня Борисово [55.35957, 38.047356]
Новосибирская обл, Маслянинский р-н, село Большой Изырак [54.514095, 84.27022]
Новосибирская обл, Маслянинский р-н, село Дубровка [54.46441, 84.721375]
Иркутская обл, Эхирит-Булагатский р-н, деревня Верхняя Идыга [52.92196, 104.562065]
Иркутская обл, Эхирит-Булагатский р-н, село Захал [52.602913, 104.73313]
Московская обл, г Серпухов, деревня Бутурлино [54.92471, 37.484756]
Моско

In [14]:
data_points['lat'], data_points['lon'] = zip(*data_points['coordinates'])

In [15]:
data_points.shape

(41, 5)

In [16]:
data_points.head()

Unnamed: 0,kladr,address,coordinates,lat,lon
0,200000101500,"Респ Башкортостан, г Уфа, деревня Жилино","[54.656384, 56.06246]",54.656384,56.06246
1,2700000100000,"Хабаровский край, г Хабаровск","[48.481403, 135.07693]",48.481403,135.07693
2,5501900001100,"Омская обл, Одесский р-н, село Желанное","[54.166344, 72.575775]",54.166344,72.575775
3,2801800001600,"Амурская обл, Тамбовский р-н, село Муравьевка","[49.837215, 127.73236]",49.837215,127.73236
4,2800000400000,"Амурская обл, г Зея","[53.740356, 127.27162]",53.740356,127.27162


In [None]:
map_points = folium.Map(width=1200, height=700, location=[55.75, 37.60], zoom_start=5)

for index, row in data_points.iterrows():
    folium.CircleMarker(location=tuple(row[['lat', 'lon']]),
                        radius=1,
                        fill_color='red',
                        color=None,
                        fill_opacity=1,
                        popup=row['address'],
                       ).add_to(map_points)

In [None]:
map_points

# Points To Roads

In [18]:
data_points_2 = data_points.loc[(pd.notnull(data_points['lat']))&(pd.notnull(data_points['lon'])), ['kladr', 'lat', 'lon']].reset_index()

In [19]:
data_points_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   index   41 non-null     int64  
 1   kladr   41 non-null     object 
 2   lat     41 non-null     float64
 3   lon     41 non-null     float64
dtypes: float64(2), int64(1), object(1)
memory usage: 1.4+ KB


In [20]:
data_osm_roads_gdf_2 = data_osm_roads_gdf.loc[(pd.notnull(data_osm_roads_gdf['road_lat']))&(pd.notnull(data_osm_roads_gdf['road_lon'])), ['road_lon', 'road_lat']]

In [21]:
data_osm_roads_gdf_2.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 57131 entries, 0 to 57130
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   road_lon  57131 non-null  float64
 1   road_lat  57131 non-null  float64
dtypes: float64(2)
memory usage: 1.3 MB


In [22]:
%%time
data_points2roads = pd.merge(data_points_2[['index', 'lat', 'lon']],
                             data_osm_roads_gdf_2,
                             how='cross',
                            )

CPU times: user 209 ms, sys: 284 ms, total: 493 ms
Wall time: 491 ms


In [23]:
data_points2roads.shape

(2342371, 5)

In [24]:
data_points2roads.head()

Unnamed: 0,index,lat,lon,road_lon,road_lat
0,0,54.656384,56.06246,34.826305,57.380612
1,0,54.656384,56.06246,34.745351,57.438669
2,0,54.656384,56.06246,34.674135,57.488555
3,0,54.656384,56.06246,31.473373,56.327296
4,0,54.656384,56.06246,31.637903,56.334365


In [25]:
%%time
data_points2roads['simplifier_sign'] = 0
data_points2roads.loc[(data_points2roads['road_lat'] < data_points2roads['lat'] + 3) & (data_points2roads['road_lat'] > data_points2roads['lat'] - 3), 'simplifier_sign'] = 1

CPU times: user 56.1 ms, sys: 52.5 ms, total: 109 ms
Wall time: 107 ms


In [26]:
%%time
data_points2roads = data_points2roads.loc[data_points2roads['simplifier_sign']==1]

CPU times: user 76.7 ms, sys: 61.3 ms, total: 138 ms
Wall time: 136 ms


In [27]:
data_points2roads.shape

(1115049, 6)

In [28]:
%%time
data_points2roads['simplifier_sign'] = 0
data_points2roads.loc[(data_points2roads['road_lon'] < data_points2roads['lon'] + 3 / np.cos(np.radians(data_points2roads['lat']))) &
                      (data_points2roads['road_lon'] > data_points2roads['lon'] - 3 / np.cos(np.radians(data_points2roads['lat']))),
                      'simplifier_sign'] = 1

CPU times: user 106 ms, sys: 8.43 ms, total: 114 ms
Wall time: 112 ms


In [29]:
%%time
data_points2roads = data_points2roads.loc[data_points2roads['simplifier_sign']==1]

CPU times: user 14 ms, sys: 7.22 ms, total: 21.2 ms
Wall time: 18.9 ms


In [30]:
data_points2roads.shape

(227909, 6)

In [31]:
def f_dist(lon_lat_1_2):
    lon_1 = lon_lat_1_2[0]
    lat_1 = lon_lat_1_2[1]
    lon_2 = lon_lat_1_2[2]
    lat_2 = lon_lat_1_2[3]
    if pd.isnull(lon_1):
        return None
    if pd.isnull(lat_1):
        return None
    if pd.isnull(lon_2):
        return None
    if pd.isnull(lat_2):
        return None
    return geodesic((lat_1, lon_1), (lat_2, lon_2)).km

In [32]:
%%time
data_points2roads['dist'] = data_points2roads[['road_lon', 'road_lat', 'lon', 'lat']].apply(f_dist, axis=1)

CPU times: user 2min 24s, sys: 900 ms, total: 2min 25s
Wall time: 2min 26s


In [33]:
data_points2roads.head().T

Unnamed: 0,11986,11990,12026,12151,12152
index,0.0,0.0,0.0,0.0,0.0
lat,54.656384,54.656384,54.656384,54.656384,54.656384
lon,56.06246,56.06246,56.06246,56.06246,56.06246
road_lon,51.208425,51.120501,51.702642,51.220868,51.227666
road_lat,52.849901,52.864984,52.765198,52.847263,52.845825
simplifier_sign,1.0,1.0,1.0,1.0,1.0
dist,377.955123,381.946933,356.514605,377.425755,377.136842


In [34]:
print(f"dist_min = {min(data_points2roads['dist'])} km, dist_max = {max(data_points2roads['dist'])} km")

dist_min = 0.762901106855363 km, dist_max = 473.7697927006494 km


In [130]:
with open('data_points2roads.pickle', 'wb') as _f:
    pickle.dump(data_points2roads, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points2roads.pickle', 'rb') as _f:
    data_points2roads = pickle.load(_f)

# Update Points

## The Nearest Road

In [35]:
%%time
data_points2roads_gr = data_points2roads.loc[:, ['index', 'dist']].groupby('index').min().reset_index()

CPU times: user 17.2 ms, sys: 3.9 ms, total: 21.1 ms
Wall time: 19.6 ms


In [36]:
data_points2roads_gr.rename(columns={'dist':'dist_to_road'}, inplace=True)

In [37]:
data_points2roads_gr.shape

(41, 2)

In [38]:
data_points2roads_gr.loc[pd.isnull(data_points2roads_gr['dist_to_road'])]

Unnamed: 0,index,dist_to_road


In [39]:
data_points2roads_gr.loc[pd.isnull(data_points2roads_gr['dist_to_road']), 'dist_to_road'] = 500

In [40]:
data_points2roads_gr.head()

Unnamed: 0,index,dist_to_road
0,0,0.762901
1,1,2.08458
2,2,13.382664
3,3,40.830141
4,4,79.173231


## Update

In [41]:
%%time
data_points = data_points.merge(data_points_2, right_on='kladr', left_on='kladr', how='left')

CPU times: user 9.12 ms, sys: 37 µs, total: 9.16 ms
Wall time: 7.68 ms


In [42]:
%%time
data_points = data_points.merge(data_points2roads_gr, right_on='index', left_on='index', how='left')

CPU times: user 4.99 ms, sys: 33 µs, total: 5.02 ms
Wall time: 4.56 ms


In [43]:
data_points.shape

(41, 9)

In [130]:
with open('data_points.pickle', 'wb') as _f:
    pickle.dump(data_points, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_points.pickle', 'rb') as _f:
    data_points = pickle.load(_f)