In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


In [2]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.tile_providers import CARTODBPOSITRON, CARTODBPOSITRON_RETINA, ESRI_IMAGERY, OSM, STAMEN_TERRAIN, STAMEN_TERRAIN_RETINA, STAMEN_TONER_BACKGROUND, get_provider
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import factor_cmap
from bokeh.palettes import inferno, RdYlGn, viridis
from pyproj import Proj, transform # longitude first, latitude second.

In [3]:
data = pd.read_json('data-2016-04-06T00-00-00-structure-2016-04-06T00-00-00.json', encoding='windows-1251')

In [4]:
data.describe()

Unnamed: 0,global_id,ID,Latitude_WGS84,Period,Longitude_WGS84,SpringNumber,ID_en,Latitude_WGS84_en,Period_en,Longitude_WGS84_en,SpringNumber_en,system_object_id
count,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,76.0
mean,605795100.0,430.15942,55.68746,2016.425121,37.462635,100.966184,430.15942,55.68746,2016.425121,37.462635,100.966184,83.697368
std,334560600.0,267.876599,0.169083,0.814282,0.163565,62.813824,267.876599,0.169083,0.814282,0.163565,62.813824,45.937354
min,167615500.0,8.0,55.170888,2015.0,36.884814,1.0,8.0,55.170888,2015.0,36.884814,1.0,8.0
25%,167615600.0,109.0,55.632194,2016.0,37.374333,44.0,109.0,55.632194,2016.0,37.374333,44.0,45.5
50%,859993500.0,592.0,55.708111,2017.0,37.470833,101.0,592.0,55.708111,2017.0,37.470833,101.0,83.0
75%,859994100.0,643.5,55.826541,2017.0,37.56,162.0,643.5,55.826541,2017.0,37.56,162.0,117.5
max,860147900.0,717.0,55.988806,2017.0,37.908611,205.0,717.0,55.988806,2017.0,37.908611,205.0,173.0


In [5]:
data['Condition'] = data['Condition'].map(lambda x: " ".join(x.split()))
data.loc[data['Condition'] == "Превышение нормативов качества по содержанию химических веществ не выявлено", 'Condition'] = \
"Превышений нормативов качества по содержанию химических веществ не выявлено"
data.loc[data['Condition'] == "Отбор проб не производится", 'Condition'] = \
"Отбор проб не производился"

In [6]:
k = 20
for column in data.columns:
    print(column)
    unique_values = data[column].unique()[:k]
    print(unique_values)

global_id
[859911161 859911186 859910979 859911523 859911515 859911175 859911200
 859911500 859911503 859911146 859913185 859913141 859913192 859913193
 859913182 859913155 859913137 859912263 859911536 859913195]
signature_date
['04.04.2018 11:41:40' '04.04.2018 11:41:55' '04.04.2018 11:42:10'
 '04.04.2018 11:42:20' '04.04.2018 11:42:35' '04.04.2018 11:42:50'
 '04.04.2018 11:43:05' '04.04.2018 11:43:20' '04.04.2018 11:43:35'
 '04.04.2018 11:43:50' '04.04.2018 11:44:00' '04.04.2018 11:44:15'
 '04.04.2018 11:44:30' '04.04.2018 11:44:35' '05.07.2018 15:59:50'
 '05.07.2018 16:00:15' '05.07.2018 16:01:10' '05.07.2018 16:00:30'
 '05.07.2018 16:00:40' '05.07.2018 16:02:15']
ID
[567 569 565 576 575 568 570 573 574 566 584 579 585 587 583 582 578 571
 577 588]
Latitude_WGS84
[55.988806 55.980722 55.988139 55.708388 55.768444 55.9818   55.966916
 55.754722 55.768386 55.632194 55.718472 55.687916 55.826722 55.713333
 55.753722 55.711111 55.826861 55.826555 55.82675  55.826527]
Period
[2017 2015 

In [7]:
# longitude first, latitude second.
data['merc'] = ([transform(Proj(init='epsg:4326'), Proj(init='epsg:3857'), x, y) for x, y in zip(data['Longitude_WGS84'], data['Latitude_WGS84'])])
# output (meters east of 0, meters north of 0): (-14314.651244750548, 6711665.883938471)
data['merc_x'] = [x for x,_ in data['merc']]
data['merc_y'] = [y for _,y in data['merc']]
data['coord'] = data['Latitude_WGS84'].astype('str') + ' ' + data['Longitude_WGS84'].astype('str')

In [12]:
# Build a data source and configure for inline output
source = ColumnDataSource(data)
output_notebook()

# define x and y ranges
merc_x_range = [np.min([temp for temp,_ in data.merc]) - 100, \
                np.max([temp for temp,_ in data.merc]) + 100]
merc_y_range = [np.min([temp for _,temp in data.merc]) - 100, \
                np.max([temp for _,temp in data.merc]) + 100]


p = figure(x_range=merc_x_range, y_range=merc_y_range, x_axis_type="mercator", y_axis_type="mercator")
tile_provider = get_provider(STAMEN_TONER_BACKGROUND)
"""CARTODBPOSITRON - Tile Source for CartoDB Tile Service
CARTODBPOSITRON_RETINA - Tile Source for CartoDB Tile Service (tiles at ‘retina’ resolution)
ESRI_IMAGERY - Tile Source for ESRI public tiles.
OSM - Tile Source for Open Street Maps.
STAMEN_TERRAIN - Tile Source for Stamen Terrain Service
STAMEN_TERRAIN_RETINA - Tile Source for Stamen Terrain Service (tiles at ‘retina’ resolution)
STAMEN_TONER - Tile Source for Stamen Toner Service
STAMEN_TONER_BACKGROUND - Tile Source for Stamen Toner Background Service which does not include labels
STAMEN_TONER_LABELS - Tile Source for Stamen Toner Service which includes only labels
"""

p.add_tile(tile_provider)

index_cmap = factor_cmap('Condition', palette=inferno(len(data.Condition.unique())), factors=sorted(data.Condition.unique()))
p.circle(source=source, x='merc_x', y='merc_y', fill_color=index_cmap, fill_alpha=0.3, radius=1000)
p.circle(source=source, x='merc_x', y='merc_y',fill_color=index_cmap, fill_alpha=0.7, radius=200)

p.add_tools(HoverTool(tooltips=[("Название родника", "@SpringName"), ("Качество воды", "@Condition"), ('Дата', '@Period'), ("Адрес", "@District"), ("Расположение", "@Location"), ("Координаты", "@coord")]))
show(p)