In [36]:
import pandas as pd
import numpy as np
from numpy import inf, mean, median
import math
import datetime
import json
import os
import glob
import pickle
import requests
# import requests_cache
import plotly.graph_objects as go
import plotly.express as px
import folium
from folium.plugins import MarkerCluster, HeatMap
import geopandas as gpd
import osm2geojson
import geojson
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from geopy.distance import geodesic

In [3]:
pd.options.display.max_rows = 100

# Load Data

In [4]:
overpass_url = "http://overpass-api.de/api/interpreter"

## Regions

In [5]:
overpass_query_regions = """
[out:json];
area["ISO3166-1"="RU"][admin_level=2];
(
    node["place"="state"](area);
    way["place"="state"](area);
    relation["place"="state"](area);
);
out center;
"""

In [6]:
response_regions = requests.get(overpass_url, 
                                params={'data': overpass_query_regions})

In [7]:
osm_regions = response_regions.json()

In [8]:
osm_regions['elements'][0]

{'type': 'node',
 'id': 502842538,
 'lat': 45.0819456,
 'lon': 134.726645,
 'tags': {'name': 'Приморский край',
  'name:be': 'Прыморскі край',
  'name:da': 'Primorskij kraj',
  'name:de': 'Region Primorje',
  'name:en': 'Primorsky Krai',
  'name:eo': 'Ĉemara Regiono',
  'name:es': 'Primorie',
  'name:fr': 'Primorie',
  'name:he': 'מחוז פרימוריה',
  'name:hr': 'Primorski kraj',
  'name:hu': 'Tengermelléki határterület',
  'name:ja': '沿海地方',
  'name:ko': '프리모르스키 지방',
  'name:lt': 'Primorės kraštas',
  'name:pl': 'Kraj Nadmorski',
  'name:ru': 'Приморский край',
  'name:sk': 'Prímorský kraj',
  'name:tt': 'Диңгез буе крае',
  'name:uk': 'Приморський край',
  'name:zh': '滨海边疆区',
  'official_name:es': 'Krai de Primorie',
  'official_name:fr': 'Kraï du Primorie',
  'place': 'state',
  'ref': 'ПРИ',
  'ref:en': 'PRI',
  'wikidata': 'Q4341',
  'wikipedia': 'ru:Приморский край'}}

In [9]:
len(osm_regions['elements'])

85

In [None]:
with open("osm_regions.json", "w") as f:
    json.dump(data_8, f)

In [10]:
type_regions = []
lon_regions = []
lat_regions = []
name_regions = []
population_regions = []

for element in osm_regions['elements']:
  
    if element['type'] == 'node':
        
        type_regions.append(element['type'])        
        lon_regions.append(element['lon'])
        lat_regions.append(element['lat'])
        name_regions.append(element['tags']['name'])

        try:
            population_regions.append(element['tags']['population'])
        except Exception:
            population_regions.append(None)
 
    elif 'center' in element:

        type_regions.append(element['type'])        
        lon_regions.append(element['center']['lon'])
        lat_regions.append(element['center']['lat'])
        name_regions.append(element['tags']['name'])

        try:
            population_regions.append(element['population'])
        except Exception:
            population_regions.append(None)

In [None]:
osm_regions = pd.DataFrame(data={'type':type_regions,'lon':lon_regions,'lat':lat_regions,'name':name_regions,'population':population_regions})

In [11]:
osm_regions
# Отутствует 'Дагестан'
# 'Республика Бурятия', 'Республика Коми' называются 'Бурятия' и 'Коми'

Unnamed: 0,type,lon,lat,name,population
0,node,134.726645,45.081946,Приморский край,
1,node,128.437295,52.803237,Амурская область,
2,node,82.047532,58.612428,Томская область,
3,node,74.341549,67.147163,Ямало-Ненецкий автономный округ,
4,node,68.641816,61.879343,Ханты-Мансийский автономный округ — Югра,
5,node,70.365884,58.820649,Тюменская область,
6,node,73.509994,56.093526,Омская область,
7,node,79.481392,54.972017,Новосибирская область,
8,node,87.342861,54.533578,Кемеровская область,
9,node,82.693142,52.693224,Алтайский край,


In [12]:
osm_regions['type'].unique()

array(['node'], dtype=object)

In [None]:
osm_regions.to_csv('osm_regions.csv', sep=';')

## Cities

In [24]:
directory = './osm_cities'

In [19]:
regions_to_load = df_regions['name'] + ['Республика Бурятия', 'Республика Коми', 'Дагестан']

In [20]:
for r in regions_to_load:
    
    files = os.listdir(directory)
    files_json = [f for f in os.listdir(directory) if f.endswith('.json')]
    
    if f'{r}.json' in files_json:
        pass

    else:
        _response = requests.get(overpass_url, 
                            params={'data': 
                                        '''
                                        [out:json][timeout:1500];
                                        area
                                            ["boundary"="administrative"]
                                            ["name"="''' + r + '''"]
                                            ->.a;
                                        (
                                            node["place"~"town|city"](area.a);
                                            way["place"~"town|city"](area.a);
                                            relation["place"~"town|city"](area.a);
                                        );
                                        out center;
                                        '''
                                    })

        try:
            _osm_cities = _response.json()
        except Exception:
            _osm_cities = None

        if _osm_cities:           
            with open(f'{directory}/{r}.json', "w") as f:
                json.dump(_osm_cities, f)

# Combine Data

In [25]:
files = os.listdir(directory)

In [26]:
len(files)

86

In [27]:
files_json = [f for f in os.listdir(directory) if f.endswith('.json')]

In [28]:
len(files_json)

86

In [29]:
type_ = []
lon_ = []
lat_ = []
region_ = []
region2_ = []
name_ = []
population_ = []


for f in files_json:
        
    with open(f'./{directory}/{f}', "r") as file_json_i:
        
        data_ = json.load(file_json_i)
        
        for element in data_['elements']:

            if element['type'] == 'node':

                region_.append(f[12:-5])   
                type_.append(element['type'])
                lon_.append(element['lon'])
                lat_.append(element['lat'])

                try:
                    region2_.append(element['tags']['addr:region'])
                except Exception:
                    region2_.append('n/d')

                try:
                    name_.append(element['tags']['name'])
                except Exception:
                    name_.append('n/d')

                try:
                    population_.append(element['tags']['population'])
                except Exception:
                    population_.append('n/d')

            elif 'center' in element:

                region_.append(f[12:-5])               
                type_.append(element['type'])
                lon_.append(element['center']['lon'])
                lat_.append(element['center']['lat'])

                try:
                    region2_.append(element['tags']['addr:region'])
                except Exception:
                    region2_.append('n/d')

                try:
                    name_.append(element['tags']['name'])
                except Exception:
                    name_.append('n/d')

                try:
                    population_.append(element['population'])
                except Exception:
                    population_.append('n/d')

In [None]:
osm_cities = pd.DataFrame(data={'type':type_,'lon':lon_,'lat':lat_,'region':region_,'region2':region2_,'name':name_,'population':population_})

In [30]:
osm_cities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5386 entries, 0 to 5385
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   type        5386 non-null   object 
 1   lon         5386 non-null   float64
 2   lat         5386 non-null   float64
 3   region      5386 non-null   object 
 4   region2     5386 non-null   object 
 5   name        5386 non-null   object 
 6   population  5386 non-null   object 
dtypes: float64(2), object(5)
memory usage: 294.7+ KB


In [31]:
osm_cities.groupby('region').size()

region
Автономна Республіка Крим                   142
Адыгея                                       24
Алтайский край                              122
Амурская область                             46
Архангельская область                        50
Астраханская область                         25
Башкортостан                                117
Белгородская область                         58
Брянская область                             56
Владимирская область                         56
Волгоградская область                        80
Вологодская область                          54
Воронежская область                          74
Дагестан                                     98
Еврейская автономная область                 14
Забайкальский край                           62
Ивановская область                           46
Ингушетия                                    26
Иркутская область                            95
Кабардино-Балкария                           32
Калининградская область          

In [32]:
def f_space(text):
    if pd.isnull(text):
        b = None
    elif text == 'n/d':
        b = None
    else:
        b = text.split()
        b = ''.join(b)
    return b

In [33]:
osm_cities['population'] = osm_cities['population'].apply(f_space)

In [34]:
osm_cities['population'] = pd.to_numeric(osm_cities['population'], downcast='float')

In [35]:
osm_cities.to_csv('osm_cities.csv', sep=';')

# Prepare Data

In [20]:
osm_cities = pd.read_csv('osm_cities.csv', sep=';', low_memory=False)

In [21]:
osm_cities.shape

(5386, 8)

In [22]:
osm_cities.head()

Unnamed: 0.1,Unnamed: 0,type,lon,lat,region,region2,name,population
0,0,node,33.367905,45.190764,Автономна Республіка Крим,n/d,Евпатория,105719.0
1,1,node,34.102486,44.952146,Автономна Республіка Крим,n/d,Симферополь,341799.0
2,2,node,34.409539,44.677112,Автономна Республіка Крим,n/d,Алушта,28919.0
3,3,node,34.389913,45.709376,Автономна Республіка Крим,n/d,Джанкой,36665.0
4,4,node,33.035764,45.308062,Автономна Республіка Крим,n/d,Мирный,4210.0


In [23]:
len(osm_cities.groupby('region').size())

86

In [24]:
osm_cities.groupby('region').size()

region
Автономна Республіка Крим                   142
Адыгея                                       24
Алтайский край                              122
Амурская область                             46
Архангельская область                        50
Астраханская область                         25
Башкортостан                                117
Белгородская область                         58
Брянская область                             56
Владимирская область                         56
Волгоградская область                        80
Вологодская область                          54
Воронежская область                          74
Дагестан                                     98
Еврейская автономная область                 14
Забайкальский край                           62
Ивановская область                           46
Ингушетия                                    26
Иркутская область                            95
Кабардино-Балкария                           32
Калининградская область          

In [25]:
osm_cities.loc[osm_cities['name']=='Керчь']

Unnamed: 0.1,Unnamed: 0,type,lon,lat,region,region2,name,population
66,66,node,36.453865,45.3534,Автономна Республіка Крим,Республика Крым,Керчь,148932.0
95,95,relation,36.493756,45.304022,Автономна Республіка Крим,Республика Крым,Керчь,
3615,3615,node,36.453865,45.3534,Республика Крым,Республика Крым,Керчь,148932.0
3644,3644,relation,36.493756,45.304022,Республика Крым,Республика Крым,Керчь,


In [26]:
osm_cities = osm_cities.loc[osm_cities['region']!='Автономна Республіка Крим']

In [27]:
osm_cities = osm_cities[['lon', 'lat', 'region', 'name', 'population']].groupby(['region', 'name']).mean().reset_index()

In [28]:
osm_cities.loc[osm_cities['name']=='Керчь']

Unnamed: 0,region,name,lon,lat,population
1751,Республика Крым,Керчь,36.47381,45.328711,148932.0


In [29]:
osm_cities.loc[osm_cities['name']=='Москва']

Unnamed: 0,region,name,lon,lat,population
1104,Москва,Москва,37.62323,55.737493,12630289.0
1176,Московская область,Москва,37.628965,55.72454,


In [30]:
osm_cities.loc[osm_cities['name']=='Санкт-Петербург']

Unnamed: 0,region,name,lon,lat,population
1019,Ленинградская область,Санкт-Петербург,30.305254,59.917442,
1993,Санкт-Петербург,Санкт-Петербург,30.310742,59.928087,5381736.0


In [31]:
osm_cities.drop([1176, 1019], inplace=True)

In [37]:
map_osm_cities = folium.Map(width=1200, height=700, zoom_start=5, location=[55.75, 37.60])

for index, row in osm_cities.iterrows():
    try:
        radius = int(math.log(float(row['population'])/50000+1)) + 1
    except Exception:
        radius = 1
    folium.CircleMarker(location=tuple(row[['lat', 'lon']]),
                        radius=radius,
                        fill_color='red',
                        color=None,
                        fill_opacity=1,
                        popup=row['name'],
                       ).add_to(map_osm_cities)

In [38]:
map_osm_cities.save('map_osm_cities.html')

In [39]:
with open('osm_cities.pickle', 'wb') as _f:
    pickle.dump(osm_cities, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('osm_cities.pickle', 'rb') as _f:
    osm_cities = pickle.load(_f)

# Regions

In [67]:
data_regions = pd.read_excel('spr_regions.xlsx')

In [68]:
data_regions.shape

(85, 7)

In [69]:
data_regions.head()

Unnamed: 0,Name,Type,Center,Area,Population,Code,FO
0,Адыгея,республика,Майкоп,7792,449171,1,Южный
1,Алтай,республика,Горно-Алтайск,92903,213703,4,Сибирский
2,Алтайский,край,Барнаул,167996,2384812,22,Сибирский
3,Амурская,область,Благовещенск,361913,809873,28,Дальневосточный
4,Архангельская,область,Архангельск,589913,1183323,29,Северо-Западный


In [70]:
data_regions['density'] = data_regions['Population'] / data_regions['Area']

In [71]:
data_regions.rename(columns={'Center':'name'}, inplace=True)

In [72]:
data_regions = data_regions.merge(osm_cities, left_on='name', right_on='name', how='left')

In [73]:
data_regions.shape

(87, 12)

In [74]:
data_regions.rename(columns={'name':'center_name', 'population':'center_population', 'lon':'center_lon', 'lat':'center_lat'}, inplace=True)

In [76]:
a = data_regions.groupby('center_name').size().reset_index(name='count')
data_regions.loc[data_regions['center_name'].isin(a.loc[a['count']>1, 'center_name'])]

Unnamed: 0,Name,Type,center_name,Area,Population,Code,FO,density,region,center_lon,center_lat,center_population
3,Амурская,область,Благовещенск,361913,809873,28,Дальневосточный,2.237756,Амурская область,127.544173,50.320583,225091.0
4,Амурская,область,Благовещенск,361913,809873,28,Дальневосточный,2.237756,Башкортостан,55.982946,55.038172,35037.0
29,Кировская,область,Киров,120374,1304348,43,Приволжский,10.835795,Калужская область,34.312939,54.071468,31039.0
30,Кировская,область,Киров,120374,1304348,43,Приволжский,10.835795,Кировская область,49.619912,58.597838,496986.0
38,Ленинградская,область,Санкт-Петербург,83908,1775540,47,Северо-Западный,21.160557,Санкт-Петербург,30.310742,59.928087,5381736.0
43,Москва,город федерального значения,Москва,2511,12197596,77,Центральный,4857.664675,Москва,37.62323,55.737493,12630289.0
44,Московская,область,Москва,44379,7231068,50,Центральный,162.938958,Москва,37.62323,55.737493,12630289.0
60,Санкт-Петербург,город федерального значения,Санкт-Петербург,1439,5191690,78,Северо-Западный,3607.845726,Санкт-Петербург,30.310742,59.928087,5381736.0


In [77]:
data_regions = data_regions.drop([4, 29]).reset_index(drop=True)

In [78]:
data_regions

Unnamed: 0,Name,Type,center_name,Area,Population,Code,FO,density,region,center_lon,center_lat,center_population
0,Адыгея,республика,Майкоп,7792,449171,1,Южный,57.645149,Адыгея,40.104261,44.605519,144055.0
1,Алтай,республика,Горно-Алтайск,92903,213703,4,Сибирский,2.300281,Республика Алтай,85.968646,51.944865,63845.0
2,Алтайский,край,Барнаул,167996,2384812,22,Сибирский,14.195648,Алтайский край,83.749388,53.340879,632723.0
3,Амурская,область,Благовещенск,361913,809873,28,Дальневосточный,2.237756,Амурская область,127.544173,50.320583,225091.0
4,Архангельская,область,Архангельск,589913,1183323,29,Северо-Западный,2.005928,Архангельская область,40.548922,64.564142,350982.0
5,Астраханская,область,Астрахань,49024,1021287,30,Южный,20.832388,Астраханская область,48.034561,46.357556,532699.0
6,Башкортостан,республика,Уфа,142947,4071987,2,Приволжский,28.485991,Башкортостан,55.993042,54.728227,1110976.0
7,Белгородская,область,Белгород,27134,1547936,31,Центральный,57.047837,Белгородская область,36.585442,50.590793,384425.0
8,Брянская,область,Брянск,34857,1232940,32,Центральный,35.371374,Брянская область,34.350404,53.260919,406553.0
9,Бурятия,республика,Улан-Удэ,351334,978495,3,Дальневосточный,2.785085,Республика Бурятия,107.633582,51.850729,431922.0


In [79]:
with open('data_regions.pickle', 'wb') as _f:
    pickle.dump(data_regions, _f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('data_regions.pickle', 'rb') as _f:
    data_regions = pickle.load(_f)