In [1]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [4]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [5]:
len(wikid)

197515

In [6]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Brůna, Otakar,""","""jk01013312""","[""aut""]","""1928-""",,,,,"""ck9204497""",""" nam a22 4500""","""921124s1992 xr a u0…","[""(brož.) :""]","[""40 Kčs""]","[""80-85465-39-6""]",,,,,,,"""1""","""0""","""Brutální sen Juraje Kukury""","""Hledání domova /""","""Otakar Brůna ; Obálka a typogr…",,,,,,"[""162 s. :""]","[""fotogr. ;""]","[""21 cm""]",,,,"[""7""]","[""vzpomínky""]","[""fd133830""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1992,162
"""1""","""Navrátil, Pavel,""","""jn20001227188""","[""aut""]","""1976-""",,,,,"""cpk20010885258""",""" cam a22 a 4500""","""000115s2000 xr a c p 0…","[""(brož.) :""]","[""Kč 130,00""]","[""80-902815-0-8""]",,,,,,,"""1""","""0""","""S počítačem na základní škole""","""pro druhý stupeň základní škol…","""Pavel Navrátil""",,,,,,"[""152 s. :""]","[""il. ;""]","[""30 cm""]",,,,"[""7"", ""9""]","[""učebnice základních škol"", ""textbooks (elementary)""]","[""fd133773"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2000,152
"""1""","""Králová, Maria""","""xx0124971""","[""aut""]",,,,,,"""nkc20213366685""",""" nam a22 i 4500""","""211013s2021 xr a f 0…","[""(brožováno)""]",,"[""978-80-210-9939-5""]",,,,,,,"""1""","""0""","""Metody kauzální analýzy pro mě…","""Methods of causal analysis for…","""Maria Králová""",,,,,,"[""173 stran :""]","[""ilustrace (některé barevné) ;""]","[""24 cm""]",,,,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,173


In [7]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [8]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [9]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Svěrák, Zdeněk,""","""jk01121890""","[""aut""]","""1936-""",,,,,"""cpk20203254965""",""" cam a22 i 4500""","""040430s2004 xr a g 0…","[""(vázáno)""]",,"[""80-86526-09-7""]",,,,,,,"""1""","""0""","""Usměj se, Lízo""",,"""Zdeněk Svěrák, Petr Šabach, Iv…",,,,,,"[""155 stran :""]","[""ilustrace ;""]","[""18 cm""]",,,,…,,,2004,155,"""Q169065""","[""Cena čtenářů"", ""Cena čtenářů"", … ""Cena ministerstva kultury za přínos v oblasti kinematografie a audiovize""]",,,,"[""es"", ""en"", … ""sq""]","""Zdeněk Svěrák""","""Zdeněk Svěrák""","[""Božena Svěráková""]","[""Česko"", ""Československo""]",,"""český herec, dramatik a scenár…","""Czech actor""","[""Hanka Jelínková"", ""Jan Svěrák""]",,"[""scenárista"", ""zpěvák"", … ""herec""]",,"[""Pedagogická fakulta Univerzity Karlovy""]",,"[""Komunistická strana Československa""]",,,,"""muž""","[""Praha""]",,1936.0,"""+1936-03-28T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Šabata, František""","""xx0144709""","[""aut""]",,,,,,"""nkc20132447188""",""" nam a22 a 4500""","""130315s2011 xr a f f 0…","[""(brož.)""]",,"[""978-80-86914-41-1""]",,,,,,,"""1""","""0""","""Nápisy a obrazy na mincích cís…","""[sběratelská příručka] /""","""František Šabata""",,,,,,"[""36 s. :""]","[""barev. il. ;""]","[""21 cm""]",,,,…,,,2011,36,"""Q120588190""",,,,,[],"""František Šabata""",,,,,"""Mgr., regionální publicista, a…",,,,,,,,,,,,,,,,,,,,,
"""1""","""Bártek, Jiří,""","""jk01010833""","[""aut""]","""1931-1979""",,,,,"""bk196305832""",""" nam a22 1 4500""","""980401s1963 xr 0…",,,,,,,,,,"""1""","""0""","""Velké střetnutí""","""Neobyčejný příběh o prologu, e…","""Jiří Bártek""",,,,,,"[""174, [1] s. ;""]",,"[""8°""]",,,,…,,,1963,174,"""Q95182689""",,,,,[],"""Jiří Bártek""","""Jiří Bártek""",,"[""Československo""]",,"""Narozen 3.9.1931 v Brně, zemře…",,,,"[""herec"", ""loutkoherec"", … ""dramatik""]",,,,,,,,"""muž""","[""Brno""]","[""České Budějovice""]",1931.0,"""+1931-09-03T00:00:00Z""",1979.0,"""+1979-04-05T00:00:00Z""",,,
"""1""","""Karger, Adolf,""","""jk01053027""","[""aut""]","""1940-""",,,,,"""bk197801935""",""" nam a22 1 4500""","""970311s1978 xr …",,,,,,,,,,"""1""","""0""","""Prostorová kinematika a Lieovy…","""Určeno [též] posl. vys. škol /""","""[Autoři:] Adolf Karger, Josef …",,,,,,"[""383, [1] s. ;""]",,"[""8°""]",,,,…,,,1978,383,"""Q95454218""",,,,,[],"""Adolf Karger""","""Adolf Karger""",,,,"""Narozen 1.3.1940 v Hanušovicíc…",,,,,,,,,,,,"""muž""","[""Hanušovice""]",,1940.0,"""+1940-03-01T00:00:00Z""",,,,,
"""1""","""Zelenková, Helena,""","""xx0001002""","[""aut""]","""1945-""",,,,,"""nkc20122429901""",""" nam a22 a 4500""","""121129s2012 xr a e 0…","[""(TIGRIS ;"", ""váz.)""]",,"[""978-80-86062-58-7""]",,,,,,,"""1""","""0""","""Láska k Moudrosti""",,"""Helena Zelenková ; [ilustrace …",,,,,,"[""381 s. :""]","[""barev. il. ;""]","[""22 cm""]",,,,…,,,2012,381,"""Q112368310""",,,,,[],"""Helena Zelenková""","""Helena Zelenková""",,,,"""Narozena 1945. Učitelka, autor…",,,,"[""učitel""]",,,,,,,,"""žena""",,,1945.0,"""+1945-00-00T00:00:00Z""",,,,,
"""1""","""Hubert, Miroslav,""","""xx0006396""","[""aut"", ""pbl""]","""1925-2021""",,,,,"""nkc20152765058""",""" nam a22 i 4500""","""151211s2015 xr abche 0…","[""(brožováno)""]",,"[""978-80-86930-82-4""]",,,,,,,"""1""","""0""","""Život a dílo loďaře L. Platovs…",,"""Miroslav Hubert""",,,,,,"[""70 stran :""]","[""ilustrace, mapy, portréty, faksimile ;""]","[""22 cm""]",,,,…,,,2015,70,"""Q95398305""",,,,,[],"""Miroslav Hubert""","""Miroslav Hubert""",,,,"""Narozen 17.12.1925. Ing., kons…",,,,"[""konstruktér"", ""lodní konstruktér"", ""historik""]",,,,,,,,"""muž""",,,1925.0,"""+1925-12-17T00:00:00Z""",,,,,
"""1""","""Sobotová, Jana,""","""mzk2014837561""","[""aut""]","""1961-""",,,,,"""nkc20162858277""",""" nam a22 i 4500""","""161128s2016 xr a f m 0…","[""(brožováno)""]",,"[""978-80-01-06051-3""]",,,,,,,"""1""","""0""","""Perspektivní nástrojové oceli""","""Perspective tool steels : habi…","""Jana Sobotová""",,,,,,"[""35 stran :""]","[""ilustrace (některé barevné) ;""]","[""21 cm""]",,,,…,,,2016,35,"""Q95144655""",,,,,[],"""Jana Sobotová""","""Jana Sobotová""",,,,"""Narozena 23. 10. 1961. Ing., P…",,,,"[""pedagog"", ""strojní inženýr"", ""vysokoškolský učitel""]",,,,,,,,"""žena""",,,1961.0,"""+1961-10-23T00:00:00Z""",,,,,
"""1""","""Kynčl, Radko,""","""xx0002836""","[""aut""]","""1949-""",,,,,"""cpk19980304211""",""" nam a22 a 4500""","""980330s1997 xr a e c 0…","[""(brož.)""]",,"[""80-7037-054-8""]","[""80-7027-054-8""]",,,,,,"""1""","""0""","""Mechanické energetické stroje""","""katalog sbírky Národního techn…","""Radko Kynčl""",,,,,,"[""152 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,,,1997,152,"""Q96243180""",,,,,[],"""Radko Kynčl""","""Radko Kynčl""",,,,"""autor, kurátor Národního techn…","""author, curator""",,,"[""kurátor"", ""historik""]",,,,,,,,"""muž""",,,1949.0,"""+1949-00-00T00:00:00Z""",,,,,
"""1""","""Hruban, Jaroslav,""","""jk01042900""","[""aut""]","""1886-1934""",,,,,"""bk193001272""",""" nam a22 1 4500""","""991027s1930 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Základy estetické hodnoty""","""příspěvek k metafysice estetik…","""Jaroslav Hruban""",,,,,,"[""138 s. ;""]",,"[""17 cm""]",,,,…,,,1930,138,"""Q60036925""",,,,,[],"""Jaroslav Hruban""","""Jaroslav Hruban""",,"[""Československo""]",,"""český pedagog, básník a překla…","""Czech educator, poet and trans…",,,"[""básník"", ""překladatel"", … ""prozaik""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Valašské Klobouky""]","[""Uherský Brod""]",1886.0,"""+1886-04-27T00:00:00Z""",1934.0,"""+1934-07-09T00:00:00Z""",,,
"""1""","""Koníček, Zdeněk,""","""jk01061528""","[""aut""]","""1924-""",,,,,"""bk195600764""",""" nam a22 1 4500""","""980924s1956 xr …",,,,,,,,,,"""1""","""0""","""Usazovací nádrže""",,"""[Autor:] Zdeněk Konček""",,,,,,"[""118, [1] s. :""]","[""[1] příl. ;""]","[""8°""]",,,,…,,,1956,118,"""Q112347308""",,,,,[],"""Zdeněk Koníček""",,,,,"""Narozen 1924. Doc., Ing., stav…",,,,"[""stavební inženýr"", ""vysokoškolský učitel""]",,,,,,,,"""muž""",,,1924.0,"""+1924-00-00T00:00:00Z""",,,,,


In [10]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [11]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [12]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [13]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Svěrák, Zdeněk,""","""jk01121890""","[""aut""]","""1936-""",,,,,"""cpk20203254965""",""" cam a22 i 4500""","""040430s2004 xr a g 0…","[""(vázáno)""]",,"[""80-86526-09-7""]",,,,,,,"""1""","""0""","""Usměj se, Lízo""",,"""Zdeněk Svěrák, Petr Šabach, Iv…",,,,,,"[""155 stran :""]","[""ilustrace ;""]","[""18 cm""]",,,,…,,,2004,155,"""Q169065""","[""Cena čtenářů"", ""Cena čtenářů"", … ""Cena ministerstva kultury za přínos v oblasti kinematografie a audiovize""]",,,,"[""es"", ""en"", … ""sq""]","""Zdeněk Svěrák""","""Zdeněk Svěrák""","[""Božena Svěráková""]","[""Česko"", ""Československo""]",,"""český herec, dramatik a scenár…","""Czech actor""","[""Hanka Jelínková"", ""Jan Svěrák""]",,"[""scenárista"", ""zpěvák"", … ""herec""]",,"[""Pedagogická fakulta Univerzity Karlovy""]",,"[""Komunistická strana Československa""]",,,,"""muž""","[""Praha""]",,1936.0,"""+1936-03-28T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Šabata, František""","""xx0144709""","[""aut""]",,,,,,"""nkc20132447188""",""" nam a22 a 4500""","""130315s2011 xr a f f 0…","[""(brož.)""]",,"[""978-80-86914-41-1""]",,,,,,,"""1""","""0""","""Nápisy a obrazy na mincích cís…","""[sběratelská příručka] /""","""František Šabata""",,,,,,"[""36 s. :""]","[""barev. il. ;""]","[""21 cm""]",,,,…,,,2011,36,"""Q120588190""",,,,,[],"""František Šabata""",,,,,"""Mgr., regionální publicista, a…",,,,,,,,,,,,,,,,,,,,,
"""1""","""Bártek, Jiří,""","""jk01010833""","[""aut""]","""1931-1979""",,,,,"""bk196305832""",""" nam a22 1 4500""","""980401s1963 xr 0…",,,,,,,,,,"""1""","""0""","""Velké střetnutí""","""Neobyčejný příběh o prologu, e…","""Jiří Bártek""",,,,,,"[""174, [1] s. ;""]",,"[""8°""]",,,,…,,,1963,174,"""Q95182689""",,,,,[],"""Jiří Bártek""","""Jiří Bártek""",,"[""Československo""]",,"""Narozen 3.9.1931 v Brně, zemře…",,,,"[""herec"", ""loutkoherec"", … ""dramatik""]",,,,,,,,"""muž""","[""Brno""]","[""České Budějovice""]",1931.0,"""+1931-09-03T00:00:00Z""",1979.0,"""+1979-04-05T00:00:00Z""",,,
"""1""","""Karger, Adolf,""","""jk01053027""","[""aut""]","""1940-""",,,,,"""bk197801935""",""" nam a22 1 4500""","""970311s1978 xr …",,,,,,,,,,"""1""","""0""","""Prostorová kinematika a Lieovy…","""Určeno [též] posl. vys. škol /""","""[Autoři:] Adolf Karger, Josef …",,,,,,"[""383, [1] s. ;""]",,"[""8°""]",,,,…,,,1978,383,"""Q95454218""",,,,,[],"""Adolf Karger""","""Adolf Karger""",,,,"""Narozen 1.3.1940 v Hanušovicíc…",,,,,,,,,,,,"""muž""","[""Hanušovice""]",,1940.0,"""+1940-03-01T00:00:00Z""",,,,,
"""1""","""Zelenková, Helena,""","""xx0001002""","[""aut""]","""1945-""",,,,,"""nkc20122429901""",""" nam a22 a 4500""","""121129s2012 xr a e 0…","[""(TIGRIS ;"", ""váz.)""]",,"[""978-80-86062-58-7""]",,,,,,,"""1""","""0""","""Láska k Moudrosti""",,"""Helena Zelenková ; [ilustrace …",,,,,,"[""381 s. :""]","[""barev. il. ;""]","[""22 cm""]",,,,…,,,2012,381,"""Q112368310""",,,,,[],"""Helena Zelenková""","""Helena Zelenková""",,,,"""Narozena 1945. Učitelka, autor…",,,,"[""učitel""]",,,,,,,,"""žena""",,,1945.0,"""+1945-00-00T00:00:00Z""",,,,,
"""1""","""Hubert, Miroslav,""","""xx0006396""","[""aut"", ""pbl""]","""1925-2021""",,,,,"""nkc20152765058""",""" nam a22 i 4500""","""151211s2015 xr abche 0…","[""(brožováno)""]",,"[""978-80-86930-82-4""]",,,,,,,"""1""","""0""","""Život a dílo loďaře L. Platovs…",,"""Miroslav Hubert""",,,,,,"[""70 stran :""]","[""ilustrace, mapy, portréty, faksimile ;""]","[""22 cm""]",,,,…,,,2015,70,"""Q95398305""",,,,,[],"""Miroslav Hubert""","""Miroslav Hubert""",,,,"""Narozen 17.12.1925. Ing., kons…",,,,"[""konstruktér"", ""lodní konstruktér"", ""historik""]",,,,,,,,"""muž""",,,1925.0,"""+1925-12-17T00:00:00Z""",,,,,
"""1""","""Sobotová, Jana,""","""mzk2014837561""","[""aut""]","""1961-""",,,,,"""nkc20162858277""",""" nam a22 i 4500""","""161128s2016 xr a f m 0…","[""(brožováno)""]",,"[""978-80-01-06051-3""]",,,,,,,"""1""","""0""","""Perspektivní nástrojové oceli""","""Perspective tool steels : habi…","""Jana Sobotová""",,,,,,"[""35 stran :""]","[""ilustrace (některé barevné) ;""]","[""21 cm""]",,,,…,,,2016,35,"""Q95144655""",,,,,[],"""Jana Sobotová""","""Jana Sobotová""",,,,"""Narozena 23. 10. 1961. Ing., P…",,,,"[""pedagog"", ""strojní inženýr"", ""vysokoškolský učitel""]",,,,,,,,"""žena""",,,1961.0,"""+1961-10-23T00:00:00Z""",,,,,
"""1""","""Kynčl, Radko,""","""xx0002836""","[""aut""]","""1949-""",,,,,"""cpk19980304211""",""" nam a22 a 4500""","""980330s1997 xr a e c 0…","[""(brož.)""]",,"[""80-7037-054-8""]","[""80-7027-054-8""]",,,,,,"""1""","""0""","""Mechanické energetické stroje""","""katalog sbírky Národního techn…","""Radko Kynčl""",,,,,,"[""152 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,,,1997,152,"""Q96243180""",,,,,[],"""Radko Kynčl""","""Radko Kynčl""",,,,"""autor, kurátor Národního techn…","""author, curator""",,,"[""kurátor"", ""historik""]",,,,,,,,"""muž""",,,1949.0,"""+1949-00-00T00:00:00Z""",,,,,
"""1""","""Hruban, Jaroslav,""","""jk01042900""","[""aut""]","""1886-1934""",,,,,"""bk193001272""",""" nam a22 1 4500""","""991027s1930 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Základy estetické hodnoty""","""příspěvek k metafysice estetik…","""Jaroslav Hruban""",,,,,,"[""138 s. ;""]",,"[""17 cm""]",,,,…,,,1930,138,"""Q60036925""",,,,,[],"""Jaroslav Hruban""","""Jaroslav Hruban""",,"[""Československo""]",,"""český pedagog, básník a překla…","""Czech educator, poet and trans…",,,"[""básník"", ""překladatel"", … ""prozaik""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Valašské Klobouky""]","[""Uherský Brod""]",1886.0,"""+1886-04-27T00:00:00Z""",1934.0,"""+1934-07-09T00:00:00Z""",,,
"""1""","""Koníček, Zdeněk,""","""jk01061528""","[""aut""]","""1924-""",,,,,"""bk195600764""",""" nam a22 1 4500""","""980924s1956 xr …",,,,,,,,,,"""1""","""0""","""Usazovací nádrže""",,"""[Autor:] Zdeněk Konček""",,,,,,"[""118, [1] s. :""]","[""[1] příl. ;""]","[""8°""]",,,,…,,,1956,118,"""Q112347308""",,,,,[],"""Zdeněk Koníček""",,,,,"""Narozen 1924. Doc., Ing., stav…",,,,"[""stavební inženýr"", ""vysokoškolský učitel""]",,,,,,,,"""muž""",,,1924.0,"""+1924-00-00T00:00:00Z""",,,,,


In [14]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [15]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [16]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [17]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [18]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport Cc""",64
"""Transport L""",64
"""svěcení""",60
"""emigrace""",58
"""Transport Ds""",58


In [19]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1900):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).sort(by="rok").with_columns(pl.lit(nazev).alias("co")) # .with_columns(pl.col("podil").rolling_mean(window_size=2))

In [20]:
kriminal = zkusenost(sloupec="vezeni", nazev="…internaci ve vězení nebo koncentračním táboře")

In [21]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="…členství v komunistické straně")

In [22]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [23]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [24]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [25]:
web=zkusenost(sloupec="web",nazev="…osobní webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,,,"""…osobní webové stránky"""
1901,602,,,"""…osobní webové stránky"""
1902,745,1,0.001342,"""…osobní webové stránky"""
1903,631,,,"""…osobní webové stránky"""
1904,648,1,0.001543,"""…osobní webové stránky"""
1905,613,1,0.001631,"""…osobní webové stránky"""
1906,665,1,0.001504,"""…osobní webové stránky"""
1907,645,1,0.00155,"""…osobní webové stránky"""
1908,788,2,0.002538,"""…osobní webové stránky"""
1909,727,,,"""…osobní webové stránky"""


In [26]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,1,0.002315,"""internace v Terezíně"""
1901,602,1,0.001661,"""internace v Terezíně"""
1902,745,1,0.001342,"""internace v Terezíně"""
1903,631,2,0.00317,"""internace v Terezíně"""
1904,648,3,0.00463,"""internace v Terezíně"""
1905,613,2,0.003263,"""internace v Terezíně"""
1906,665,2,0.003008,"""internace v Terezíně"""
1907,645,5,0.007752,"""internace v Terezíně"""
1908,788,5,0.006345,"""internace v Terezíně"""
1909,727,4,0.005502,"""internace v Terezíně"""


In [27]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, web])), 
    title={'text': ["Kolik českých autorů mělo za sebou nebo před sebou…"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v daném roce napsali lidé s určitou",
         "zkušeností – bez ohledu na to, kdy ji udělali. Zuby v roce 1990 znamenají,", 
        "že po revoluci začali vycházet jak lidé dříve věznění, tak vyloučení z KSČ.",
        "Data jsou neúplná, reálné podíly budou spíše vyšší; podstatné jsou zde trendy."
    ]}).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6', tickCount=10)
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#5E2D3A', '#D6534B', '#445B78']),
              sort=["účet na Facebooku","pobyt ve vězení nebo koncentračním táboře","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelFontWeight=500, labelFont='Asap', labelOrient="top"),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(
    width=kredity['sirka'] * 1.15, 
    height=kredity['vyska_nizkych'], 
    autosize={'type': 'fit', 'contains': 'padding'}
).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent").configure_view(stroke='transparent').configure_axis(grid=False, domain=False)

podily_faceted

In [28]:
me_to_neurazi(podily_faceted, soubor="02_zkusenosti", kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Kolik českých autorů mělo za sebou nebo před sebou…']“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>
