Materiały:
 - https://github.com/ppatrzyk/polska-geojson/blob/master/README.md
 - https://pl.wikipedia.org/wiki/Wojew%C3%B3dztwo
 - https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&action=history&limit=500

In [1]:
# import all necessary stuff
import holoviews as hv
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs
from holoviews import opts
from io import StringIO
hv.extension('bokeh')


In [2]:
# getting links of geojson and turning them to raw content
SRC_geojson = requests.get('https://github.com/ppatrzyk/polska-geojson/blob/master/README.md')
raw = 'raw.githubusercontent.com'
SRC_soup = bs(SRC_geojson.text, 'lxml')
SRC_links = []
for url in SRC_soup.find_all('a'):
    if '.geojson' in url.get('href'):
        if 'github.com' in url.get('href'):
            SRC_links.append(url.get('href')[2:-1])
for i in range(len(SRC_links)):
    SRC_links[i] = SRC_links[i].replace('github.com',raw).replace('raw/','')[:-1]
SRC_links

['https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/wojewodztwa/wojewodztwa-max.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/wojewodztwa/wojewodztwa-medium.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/wojewodztwa/wojewodztwa-min.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/powiaty/powiaty-max.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/powiaty/powiaty-medium.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/powiaty/powiaty-min.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/miasta/wroclaw-max.geojson',
 'https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master/miasta/wroclaw-medium.geojson']

In [3]:
# adjusting geojson dataframe into something readable
geojson_rq = requests.get(SRC_links[1])
geojson_soup = bs(geojson_rq.text, 'lxml')
geojson_text = geojson_soup.find('p').text
geojson_frame = pd.read_json(StringIO(geojson_text))
data = []
for i in range(len(geojson_frame)):
    name = geojson_frame['features'][i]['properties']['nazwa']
    coords = geojson_frame['features'][i]['geometry']['coordinates']
    while len(coords) < 10:
        coords = coords[0]
    coords = np.swapaxes(coords,0,1)
    data.append({'name':name, 'x': coords[0], 'y': coords[1]})
data

[{'name': 'śląskie',
  'x': array([19.4698, 19.4689, 19.468 , ..., 19.469 , 19.4697, 19.4698]),
  'y': array([50.9154, 50.9137, 50.9129, ..., 50.9178, 50.9175, 50.9154])},
 {'name': 'opolskie',
  'x': array([17.8122, 17.812 , 17.8122, ..., 17.8131, 17.8126, 17.8122]),
  'y': array([51.1349, 51.1335, 51.1325, ..., 51.1379, 51.1353, 51.1349])},
 {'name': 'wielkopolskie',
  'x': array([16.8487, 16.8496, 16.852 , ..., 16.8448, 16.8491, 16.8487]),
  'y': array([53.6287, 53.6287, 53.6293, ..., 53.6265, 53.6272, 53.6287])},
 {'name': 'zachodniopomorskie',
  'x': array([16.3716, 16.372 , 16.3741, ..., 16.372 , 16.3715, 16.3716]),
  'y': array([54.4414, 54.442 , 54.4417, ..., 54.44  , 54.4407, 54.4414])},
 {'name': 'świętokrzyskie',
  'x': array([19.7471, 19.7474, 19.7535, ..., 19.7525, 19.7497, 19.7471]),
  'y': array([50.866 , 50.8666, 50.8689, ..., 50.8643, 50.8651, 50.866 ])},
 {'name': 'kujawsko-pomorskie',
  'x': array([18.6768, 18.6785, 18.6804, ..., 18.6731, 18.6757, 18.6768]),
  'y': a

In [4]:
# load basic stats for vovoidship from wiki and correct them
stats_frame =  pd.read_html('https://pl.wikipedia.org/wiki/Wojew%C3%B3dztwo', match='TERYT')[0]
stats_frame.columns=['teryt','wojewodztwo','siedziba','powierzchnia','ludnosc','gestosc-zal','urbanizacja','bezrobocie','pkbpc','rejestracja']
for column in stats_frame.columns[3:9]:
    stats_frame[column] = [float(str(i).replace(',','.').replace(' ','').replace('%','')) for i in stats_frame[column]]
stats_frame

Unnamed: 0,teryt,wojewodztwo,siedziba,powierzchnia,ludnosc,gestosc-zal,urbanizacja,bezrobocie,pkbpc,rejestracja
0,02 (DS),dolnośląskie,Wrocław,19947.0,2897700.0,14545.0,68.61,5.4,57228.0,"D, V"
1,04 (KP),kujawsko-pomorskie,Bydgoszcz1) Toruń2),17971.0,2017700.0,11562.0,59.09,9.2,41875.0,C
2,06 (LU),lubelskie,Lublin,25123.0,2038300.0,8429.0,46.46,8.4,35712.0,L
3,08 (LB),lubuskie,Gorzów Wielkopolski1) Zielona Góra2),13988.0,985500.0,7253.0,64.95,6.2,42755.0,F
4,10 (LD),łódzkie,Łódź,18219.0,2394900.0,13537.0,62.55,6.3,48126.0,E
5,12 (MA),małopolskie,Kraków,15183.0,3430400.0,22398.0,48.19,4.9,47272.0,"K, J"
6,14 (MZ),mazowieckie,Warszawa,35559.0,5512800.0,15196.0,64.4,5.1,83123.0,"W, A"
7,16 (OP),opolskie,Opole,9412.0,948600.0,10482.0,53.3,6.6,41080.0,O
8,18 (PK),podkarpackie,Rzeszów,17846.0,2085900.0,11930.0,41.09,9.1,36088.0,"R, Y"
9,20 (PD),podlaskie,Białystok,20187.0,1148700.0,5853.0,60.79,8.1,37077.0,B


In [5]:
# add new data to dict
for i in range(len(data)):
   tmp = stats_frame[stats_frame['wojewodztwo'] == data[i]['name']].copy()
   data[i]['teryt'] = tmp['teryt'].tolist()[0]
   data[i]['capital'] = tmp['siedziba'].tolist()[0]
   data[i]['area'] = tmp['powierzchnia'].tolist()[0]
   data[i]['population'] = tmp['ludnosc'].tolist()[0]/1000000
   data[i]['urbanization'] = tmp['urbanizacja'].tolist()[0]
   data[i]['unemployment'] = tmp['bezrobocie'].tolist()[0]
   data[i]['plates'] = tmp['rejestracja'].tolist()[0]
data


[{'name': 'śląskie',
  'x': array([19.4698, 19.4689, 19.468 , ..., 19.469 , 19.4697, 19.4698]),
  'y': array([50.9154, 50.9137, 50.9129, ..., 50.9178, 50.9175, 50.9154]),
  'teryt': '24 (SL)',
  'capital': 'Katowice',
  'area': 12333.0,
  'population': 4.3759,
  'urbanization': 76.73,
  'unemployment': 4.5,
  'plates': 'S, I'},
 {'name': 'opolskie',
  'x': array([17.8122, 17.812 , 17.8122, ..., 17.8131, 17.8126, 17.8122]),
  'y': array([51.1349, 51.1335, 51.1325, ..., 51.1379, 51.1353, 51.1349]),
  'teryt': '16 (OP)',
  'capital': 'Opole',
  'area': 9412.0,
  'population': 0.9486,
  'urbanization': 53.3,
  'unemployment': 6.6,
  'plates': 'O'},
 {'name': 'wielkopolskie',
  'x': array([16.8487, 16.8496, 16.852 , ..., 16.8448, 16.8491, 16.8487]),
  'y': array([53.6287, 53.6287, 53.6293, ..., 53.6265, 53.6272, 53.6287]),
  'teryt': '30 (WP)',
  'capital': 'Poznań',
  'area': 29826.0,
  'population': 3.5,
  'urbanization': 54.27,
  'unemployment': 3.3,
  'plates': 'P, M'},
 {'name': 'zacho

In [6]:
# get links of older versions of site
prev_get = requests.get('https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&action=history&limit=500')
prev_soup = bs(prev_get.text, 'lxml')
years = [2023-i for i in range(11)]
prev_main = 'https://pl.wikipedia.org'
prev_links = []
sec = prev_soup.find('section', {'id': 'pagehistory'})
for link in sec.find_all('a'):
    if len(years) == 0 : break
    if str(years[0]) in link.text:
        prev_links.append(prev_main+link.get('href'))
        years.remove(years[0])
prev_links = prev_links[::-1]
prev_links

['https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=38229425',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=41370798',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=44274527',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=47918830',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=51298011',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=55329093',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=58098095',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=61137780',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=63293664',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=68709509',
 'https://pl.wikipedia.org/w/index.php?title=Wojew%C3%B3dztwo&oldid=72336827']

In [7]:
# making frame of population from different years
years = [2023-i for i in range(11)][::-1]
prev_pop = []
for i in range(len(years)):
    tabela = pd.read_html(prev_links[i], encoding='UTF-8')[0]
    if len(tabela.columns) == 9:
        tabela.columns=['teryt','wojewodztwo','siedziba','powierzchnia','ludnosc','urbanizacja','nezrobocie','pkbpc','rejestracja']
    else: 
        tabela.columns=['teryt','wojewodztwo','siedziba','powierzchnia','ludnosc','gestosc-zal','urbanizacja','nezrobocie','pkbpc','rejestracja']
    for column in tabela.columns:
        try:    
            tabela[column] = [float(str(i).replace(',','.').replace(' ','').replace('%','').replace('\xa0','')) for i in tabela[column]]
        except ValueError:
            pass
    keys = ['year'] + tabela['wojewodztwo'].tolist()
    values = [years[i]] + [e/1000000 for e in tabela['ludnosc'].tolist()]
    res = {keys[j]:values[j] for j in range(len(keys))}
    prev_pop.append(res)
prev_pop = pd.DataFrame(data=prev_pop)
prev_pop

Unnamed: 0,year,dolnośląskie,kujawsko-pomorskie,lubelskie,lubuskie,łódzkie,małopolskie,mazowieckie,opolskie,podkarpackie,podlaskie,pomorskie,śląskie,świętokrzyskie,warmińsko-mazurskie,wielkopolskie,zachodniopomorskie
0,2013,2.876627,2.069083,2.157202,1.010047,2.541832,3.29827,5.222167,1.031097,2.101732,1.189731,2.230099,4.640725,1.27012,1.427118,3.408281,1.693198
1,2014,2.914362,2.096404,2.165651,1.023317,2.524651,3.354077,5.30176,1.010203,2.129951,1.19869,2.29007,4.61587,1.273995,1.450697,3.462196,1.721405
2,2015,2.914362,2.096404,2.165651,1.023317,2.524651,3.354077,5.30176,1.010203,2.129951,1.19869,2.29007,4.61587,1.273995,1.450697,3.462196,1.721405
3,2016,2.914362,2.096404,2.165651,1.023317,2.524651,3.354077,5.30176,1.010203,2.129951,1.19869,2.29007,4.61587,1.273995,1.450697,3.462196,1.721405
4,2017,2.904198,2.08621,2.139726,1.018084,2.493603,3.372618,5.349114,0.996011,2.127657,1.1888,2.30771,4.570849,1.257179,1.439675,3.475323,1.710482
5,2018,2.901003,2.079917,2.121613,1.01544,2.47061,3.395663,5.391813,0.988014,2.128747,1.182677,2.328214,4.54014,1.244383,1.431299,3.490597,1.703009
6,2019,2.901225,2.077775,2.117629,1.014548,2.466322,3.400577,5.403412,0.986506,2.129015,1.181533,2.333523,4.533565,1.241546,1.428983,3.493969,1.70103
7,2020,2.898525,2.069273,2.103342,1.010177,2.448713,3.413931,5.428031,0.980771,2.125901,1.176576,2.346717,4.508078,1.230044,1.420514,3.500361,1.693219
8,2021,2.898525,2.069273,2.103342,1.010177,2.448713,3.413931,5.428031,0.980771,2.125901,1.176576,2.346717,4.508078,1.230044,1.420514,3.500361,1.693219
9,2022,2.898525,2.069273,2.103342,1.010177,2.448713,3.413931,5.428031,0.980771,2.125901,1.176576,2.346717,4.508078,1.230044,1.420514,3.500361,1.693219


In [8]:
# making map of poland
vov_map = hv.Polygons(data, ['x','y'], [
    ('teryt','TERYT'),
    ('name','Nazwa'),
    ('capital','Siedziba'),
    ('area','Powierzchnia [km^2]'),
    ('population','Populacja [mln]'),
    ('urbanization','Urbanizacja [%]'),
    ('unemployment', 'Bezrobocie [%]')
    ])
vov_map.opts(opts.Polygons(
    xaxis=None, yaxis=None,
    width=300, height=280,
    logz=True,
    tools=['hover'],
    show_grid=False,
    color_index='population',
    show_frame=False,
    colorbar=True,
    toolbar='above'
))



In [9]:
pos_xy = hv.streams.Tap(source=vov_map, x=0, y=0)

def tap_bars(x, y):
    global vov
    for element in data:
        mid_x = (np.max(element['x']) + np.min(element['x']))/2 
        mid_y = (np.max(element['y']) + np.min(element['y']))/2 
        d = np.sqrt(np.power(mid_x-x,2)+np.power(mid_y-y,2))
        if d < np.pi*0.5**2:
            vov = ' '+element['name']
            return hv.Bars(data=prev_pop[['year',element['name']]])
    return hv.Bars([(0,0)])

tap_dmap = hv.DynamicMap(tap_bars, streams=[pos_xy])

(vov_map + tap_dmap ).opts(
    opts.Bars(
        title=f"Ludnosc w [mln] w wojewodztwie na przestrzeni lat",
        axiswise=True,
        height=500, width=500, 
        yaxis='right', 
        toolbar='right', 
    ),
    opts.Polygons(
    title='Populacja w Polsce (Województwa)',
    xaxis=None, yaxis=None,
    width=500, height=500,
    logz=True,
    tools=['hover'],
    color_index = 'population',
    show_grid=False,
    show_frame=False,
    colorbar=True,
    toolbar='above'
))



BokehModel(combine_events=True, render_bundle={'docs_json': {'1540eafb-e144-4e58-8129-76ca046f6ddc': {'version…