In [1]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [4]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [5]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Prantl, Ferdinand,""","""jk01100899""","[""aut""]","""1907-1982""",,,,,"""bk195405677""",""" nam a22 1 4500""","""980725s1954 xr e | 0…",,,,,,,,,,"""1""","""0""","""O českých zástupcích čeledi Ha…",,"""Ferdinand Prantl, Alois Přibyl""",,,,,,"[""170 s. :""]","[""10 tb. ;""]","[""4°""]",,,,"[""7""]","[""studie""]","[""fd133597""]","[""czenas""]",,,,"[""1""]","[""Přibyl, Alois,""]","[""aut""]","[""1914-1988""]","[""jk01101524""]",,,,,,,,,,,,,,,,,,,,,,1954,170
"""1""","""Miklík, Josef Konstantin,""","""jk01081552""","[""aut""]","""1895-1966""",,,,,"""bk193901843""",""" nam a22 1 4500""","""990305s1939 xr …","[""(Váz.)""]",,,,,,,,,"""1""","""0""","""Sokolská tradice""","""Texty a glosy dokumentární /""","""Kristián Michna [pseud.]""",,,,,,"[""87, [I] s. ;""]",,"[""8°""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1939,87
"""1""","""Petržela, Zdeněk""","""jx20041227004""","[""aut""]",,,,,,"""bk197603604""",""" nam a22 1 4500""","""970507s1975 xr a e p 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Tváření III""","""konstrukce a výpočty tvářecích…","""Zdeněk Petržela""",,,,,,"[""325 s. :""]","[""il. ;""]","[""29 cm""]",,,,"[""7""]","[""učebnice vysokých škol""]","[""fd133772""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1975,325


In [6]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [7]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [8]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Krchňák, Rudolf,""","""ola200205571""","[""aut""]","""1905-1996""",,,,,"""np9424560""",""" nam a22 a 4500""","""940623s1992 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Nevidomí známí, neznámí""",,"""Rudolf Krchňák""",,,,,,"[""216 s. ;""]",,"[""29 cm""]",,,,…,,,1992,216,"""Q12354704""",,,,,"[""eo""]","""Rudolf Krchňák""","""Rudolf Krchňák""",,"[""Česko""]",,"""český esperantista a učitel hu…","""Czech Esperantist""",,,"[""esperantista"", ""hudební skladatel"", ""publicista""]",,,,,,,,"""muž""","[""Brno""]","[""Brno""]",1905.0,"""+1905-08-03T00:00:00Z""",1996.0,"""+1996-01-09T00:00:00Z""",,,
"""1""","""Jansa, Pavel,""","""jn20010309161""","[""aut""]","""1942-""",,,,,"""nkc20172929900""",""" nam a22 i 4500""","""170830s2017 xr g 0…","[""(brožováno) :""]","[""Kč 250,00""]","[""978-80-88143-10-9""]",,,,,,,"""1""","""0""","""Tenkrát v Olomouci""",,"""Pavel Jansa""",,,,,,"[""246 stran ;""]",,"[""21 cm""]",,,,…,,,2017,246,"""Q12044098""",,,,,"[""cs""]","""Pavel Jansa""","""Pavel Jansa""",,"[""Československo""]",,"""český lékař, patolog, překlada…","""Czech doctor, patologist, tran…",,,"[""lékař"", ""pedagog"", … ""prozaik""]","[""kandidát tajné spolupráce"", ""tajný spolupracovník""]",,,,,,,"""muž""","[""Prostějov""]",,1942.0,"""+1942-05-27T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Máka, Antonín Bohumil,""","""jk01073139""","[""aut""]","""1867-1929""",,,,,"""nos190227490""",""" nam a22 1 4500""","""000829s1904 xr cf 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Matrika rodu Mákova ve Stružin…",,"""sepsal A.B. Máka""",,,,,,"[""201 s., [13] l. obr. příl. :""]","[""portréty ;""]","[""23 cm""]",,,,…,,,1904,201,"""Q95332555""",,,,,[],"""Antonín Bohumil Máka""","""Antonín Bohumil Máka""",,,,"""Narozen 20.2.1867 ve Stružinci…",,,,"[""katolický kněz""]",,,,,,,,"""muž""","[""Stružnice""]","[""Sopoty""]",1867.0,"""+1867-02-20T00:00:00Z""",1929.0,"""+1929-01-01T00:00:00Z""",,,
"""1""","""Čermáková-Sluková, Anežka,""","""jk01021363""","[""aut""]","""1864-1947""",,,,,"""cpk20060909164""",""" nam a22 a 4500""","""060511s1893 xr af j 0…","[""(Váz.)""]",,,,,,,,,"""1""","""0""","""Povídej, povídej""","""nová sbírka oblíbených pohádek…","""pro české dítky upravila Anežk…",,,,,,"[""123 s., [3] l. barev. obr. příl. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,1893,123,"""Q91800230""",,,,,"[""cs""]","""Anežka Čermáková-Sluková""","""Anežka Čermáková-Sluková""",,"[""Československo""]",,"""česká učitelka a spisovatelka""",,,,"[""učitel"", ""spisovatel"", … ""vydavatel""]",,,,,,,,"""žena""","[""Dolení Paseky""]","[""Praha""]",1864.0,"""+1864-02-12T00:00:00Z""",1947.0,"""+1947-03-07T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Krejčí, Jaroslav,""","""jk01063203""","[""aut""]","""1892-1956""",,,,,"""bk193101825""",""" nam a22 1 4500""","""991019s1931 xr |…",,,,,,,,,,"""1""","""0""","""Zásada právnosti státních funk…",,"""napsal Jaroslav Krejčí""",,,,,,"[""179, [III] s. ;""]",,"[""8°""]",,,,…,,,1931,179,"""Q557235""",,,,,"[""en"", ""arz"", … ""cs""]","""Jaroslav Krejčí""","""Jaroslav Krejčí""",,"[""Československo"", ""Předlitavsko""]",,"""český politik, právník a ústav…","""Czechoslovak lawyer and politi…","[""Jaroslav Krejčí ml.""]",,"[""politik"", ""pedagog"", … ""právník""]",,"[""Právnická fakulta Univerzity Karlovy""]",,"[""Národní souručenství""]",,,,"""muž""","[""Křemenec""]","[""Leopoldov""]",1892.0,"""+1892-06-27T00:00:00Z""",1956.0,"""+1956-05-18T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Rychnovský, Richard,""","""jk01103330""","[""aut""]","""1906-""",,,,,"""bk196400906""",""" nam a22 1 4500""","""970918s1964 xr e p 0…",,,,,,,,,,"""1""","""0""","""Úvod do vyšší matematiky""","""učebnice pro vys. školy zeměd.…","""Richard Rychnovský""",,,,,,"[""382 s. :""]","[""il. ;""]","[""8°""]",,,,…,,,1964,382,"""Q112349243""",,,,,[],"""Richard Rychnovský""","""Richard Rychnovský""",,,,"""Narozen 1906. RNDr., docent ma…",,,,,,,,,,,,"""muž""",,,1906.0,"""+1906-00-00T00:00:00Z""",,,,,
"""1""","""Maršálek, Zdeněk""","""mzk2011636342""","[""aut""]",,,,,,"""np9312567""",""" nam a22 4500""","""940120s1993 xr a u0…","[""(brož.) :""]","[""Kč 24,00""]","[""80-7078-160-2""]",,,,,,,"""1""","""0""","""Základy hlubinného dobývání lo…",,"""Zdeněk Maršálek, Vlastimil Hud…",,,,,,"[""145 s. :""]","[""il. ;""]","[""29 cm""]",,,,…,,,1993,145,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Sternberg, Vojtěch Václav,""","""jk01121159""","[""aut""]","""1868-1930""",,"[""hrabě,""]",,,"""cpk20122415177""",""" nam a22 a 4500""","""121102s1908 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Román lásky mateřské""","""(bůh křesťanský a bůh židovský…","""ze současného života šlechtick…",,,,,,"[""99 s. ;""]",,"[""18 cm""]",,,,…,,,1908,99,"""Q15735535""",,,,,"[""de"", ""cs""]","""Vojtěch Václav ze Sternbergu""","""Vojtěch Václav Sternberg""",,"[""Rakousko-Uhersko"", ""Československo""]",,"""český šlechtic""","""Czech nobleman""",,,"[""politik"", ""publicista""]",,,"[""Leopold Albert ze Sternbergu""]",,,,,"""muž""","[""Pohořelice"", ""Pohořelice""]","[""Vídeň""]",1868.0,"""+1868-01-14T00:00:00Z""",1930.0,"""+1930-04-25T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Landa, Ondřej,""","""xx0111540""","[""aut""]","""1985-""",,,,,"""nkc20233518168""",""" cam a22 i 4500""","""230419s2023 xr e 0…","[""(vázáno)""]",,"[""978-80-7530-412-4""]",,,,,,,"""1""","""0""","""Min Tanaka a japonská cesta tě…",,"""Ondřej Landa""",,,,,,"[""278 stran ;""]",,"[""20 cm""]",,,,…,,,2023,278,"""Q112426260""",,,,,[],"""Ondřej Landa""",,,,,"""Narozen 1985 v České Lípě. Etn…",,,,"[""etnograf""]",,,,,,,,"""muž""","[""Česká Lípa""]",,1985.0,"""+1985-00-00T00:00:00Z""",,,,,
"""1""","""Zavřel, František,""","""jk01152269""","[""aut""]","""1884-1947""",,,,,"""bk192905740""",""" nam a22 1 4500""","""991207s1929 xr …",,,,,,,,,,"""1""","""0""","""Věčné mládí""","""román /""","""František Zavřel""",,,,,,"[""209, [2] s. ;""]",,"[""8°""]",,,,…,,,1929,209,"""Q12017640""",,,,,"[""cs""]","""František Zavřel""","""František Zavřel""",,"[""Československo""]",,"""český dramatik a spisovatel""","""Czech playwright and writer (1…",,,"[""spisovatel"", ""dramatik"", … ""státní úředník""]",,"[""Filozofická fakulta Univerzity Karlovy"", ""Právnická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Trhová Kamenice""]","[""Praha""]",1885.0,"""+1885-11-01T00:00:00Z""",1947.0,"""+1947-12-04T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",


In [9]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [10]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [11]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [12]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Krchňák, Rudolf,""","""ola200205571""","[""aut""]","""1905-1996""",,,,,"""np9424560""",""" nam a22 a 4500""","""940623s1992 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Nevidomí známí, neznámí""",,"""Rudolf Krchňák""",,,,,,"[""216 s. ;""]",,"[""29 cm""]",,,,…,,,1992,216,"""Q12354704""",,,,,"[""eo""]","""Rudolf Krchňák""","""Rudolf Krchňák""",,"[""Česko""]",,"""český esperantista a učitel hu…","""Czech Esperantist""",,,"[""esperantista"", ""hudební skladatel"", ""publicista""]",,,,,,,,"""muž""","[""Brno""]","[""Brno""]",1905.0,"""+1905-08-03T00:00:00Z""",1996.0,"""+1996-01-09T00:00:00Z""",,,
"""1""","""Jansa, Pavel,""","""jn20010309161""","[""aut""]","""1942-""",,,,,"""nkc20172929900""",""" nam a22 i 4500""","""170830s2017 xr g 0…","[""(brožováno) :""]","[""Kč 250,00""]","[""978-80-88143-10-9""]",,,,,,,"""1""","""0""","""Tenkrát v Olomouci""",,"""Pavel Jansa""",,,,,,"[""246 stran ;""]",,"[""21 cm""]",,,,…,,,2017,246,"""Q12044098""",,,,,"[""cs""]","""Pavel Jansa""","""Pavel Jansa""",,"[""Československo""]",,"""český lékař, patolog, překlada…","""Czech doctor, patologist, tran…",,,"[""lékař"", ""pedagog"", … ""prozaik""]","[""kandidát tajné spolupráce"", ""tajný spolupracovník""]",,,,,,,"""muž""","[""Prostějov""]",,1942.0,"""+1942-05-27T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Máka, Antonín Bohumil,""","""jk01073139""","[""aut""]","""1867-1929""",,,,,"""nos190227490""",""" nam a22 1 4500""","""000829s1904 xr cf 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Matrika rodu Mákova ve Stružin…",,"""sepsal A.B. Máka""",,,,,,"[""201 s., [13] l. obr. příl. :""]","[""portréty ;""]","[""23 cm""]",,,,…,,,1904,201,"""Q95332555""",,,,,[],"""Antonín Bohumil Máka""","""Antonín Bohumil Máka""",,,,"""Narozen 20.2.1867 ve Stružinci…",,,,"[""katolický kněz""]",,,,,,,,"""muž""","[""Stružnice""]","[""Sopoty""]",1867.0,"""+1867-02-20T00:00:00Z""",1929.0,"""+1929-01-01T00:00:00Z""",,,
"""1""","""Čermáková-Sluková, Anežka,""","""jk01021363""","[""aut""]","""1864-1947""",,,,,"""cpk20060909164""",""" nam a22 a 4500""","""060511s1893 xr af j 0…","[""(Váz.)""]",,,,,,,,,"""1""","""0""","""Povídej, povídej""","""nová sbírka oblíbených pohádek…","""pro české dítky upravila Anežk…",,,,,,"[""123 s., [3] l. barev. obr. příl. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,1893,123,"""Q91800230""",,,,,"[""cs""]","""Anežka Čermáková-Sluková""","""Anežka Čermáková-Sluková""",,"[""Československo""]",,"""česká učitelka a spisovatelka""",,,,"[""učitel"", ""spisovatel"", … ""vydavatel""]",,,,,,,,"""žena""","[""Dolení Paseky""]","[""Praha""]",1864.0,"""+1864-02-12T00:00:00Z""",1947.0,"""+1947-03-07T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Krejčí, Jaroslav,""","""jk01063203""","[""aut""]","""1892-1956""",,,,,"""bk193101825""",""" nam a22 1 4500""","""991019s1931 xr |…",,,,,,,,,,"""1""","""0""","""Zásada právnosti státních funk…",,"""napsal Jaroslav Krejčí""",,,,,,"[""179, [III] s. ;""]",,"[""8°""]",,,,…,,,1931,179,"""Q557235""",,,,,"[""en"", ""arz"", … ""cs""]","""Jaroslav Krejčí""","""Jaroslav Krejčí""",,"[""Československo"", ""Předlitavsko""]",,"""český politik, právník a ústav…","""Czechoslovak lawyer and politi…","[""Jaroslav Krejčí ml.""]",,"[""politik"", ""pedagog"", … ""právník""]",,"[""Právnická fakulta Univerzity Karlovy""]",,"[""Národní souručenství""]",,,,"""muž""","[""Křemenec""]","[""Leopoldov""]",1892.0,"""+1892-06-27T00:00:00Z""",1956.0,"""+1956-05-18T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Rychnovský, Richard,""","""jk01103330""","[""aut""]","""1906-""",,,,,"""bk196400906""",""" nam a22 1 4500""","""970918s1964 xr e p 0…",,,,,,,,,,"""1""","""0""","""Úvod do vyšší matematiky""","""učebnice pro vys. školy zeměd.…","""Richard Rychnovský""",,,,,,"[""382 s. :""]","[""il. ;""]","[""8°""]",,,,…,,,1964,382,"""Q112349243""",,,,,[],"""Richard Rychnovský""","""Richard Rychnovský""",,,,"""Narozen 1906. RNDr., docent ma…",,,,,,,,,,,,"""muž""",,,1906.0,"""+1906-00-00T00:00:00Z""",,,,,
"""1""","""Maršálek, Zdeněk""","""mzk2011636342""","[""aut""]",,,,,,"""np9312567""",""" nam a22 4500""","""940120s1993 xr a u0…","[""(brož.) :""]","[""Kč 24,00""]","[""80-7078-160-2""]",,,,,,,"""1""","""0""","""Základy hlubinného dobývání lo…",,"""Zdeněk Maršálek, Vlastimil Hud…",,,,,,"[""145 s. :""]","[""il. ;""]","[""29 cm""]",,,,…,,,1993,145,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Sternberg, Vojtěch Václav,""","""jk01121159""","[""aut""]","""1868-1930""",,"[""hrabě,""]",,,"""cpk20122415177""",""" nam a22 a 4500""","""121102s1908 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Román lásky mateřské""","""(bůh křesťanský a bůh židovský…","""ze současného života šlechtick…",,,,,,"[""99 s. ;""]",,"[""18 cm""]",,,,…,,,1908,99,"""Q15735535""",,,,,"[""de"", ""cs""]","""Vojtěch Václav ze Sternbergu""","""Vojtěch Václav Sternberg""",,"[""Rakousko-Uhersko"", ""Československo""]",,"""český šlechtic""","""Czech nobleman""",,,"[""politik"", ""publicista""]",,,"[""Leopold Albert ze Sternbergu""]",,,,,"""muž""","[""Pohořelice"", ""Pohořelice""]","[""Vídeň""]",1868.0,"""+1868-01-14T00:00:00Z""",1930.0,"""+1930-04-25T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Landa, Ondřej,""","""xx0111540""","[""aut""]","""1985-""",,,,,"""nkc20233518168""",""" cam a22 i 4500""","""230419s2023 xr e 0…","[""(vázáno)""]",,"[""978-80-7530-412-4""]",,,,,,,"""1""","""0""","""Min Tanaka a japonská cesta tě…",,"""Ondřej Landa""",,,,,,"[""278 stran ;""]",,"[""20 cm""]",,,,…,,,2023,278,"""Q112426260""",,,,,[],"""Ondřej Landa""",,,,,"""Narozen 1985 v České Lípě. Etn…",,,,"[""etnograf""]",,,,,,,,"""muž""","[""Česká Lípa""]",,1985.0,"""+1985-00-00T00:00:00Z""",,,,,
"""1""","""Zavřel, František,""","""jk01152269""","[""aut""]","""1884-1947""",,,,,"""bk192905740""",""" nam a22 1 4500""","""991207s1929 xr …",,,,,,,,,,"""1""","""0""","""Věčné mládí""","""román /""","""František Zavřel""",,,,,,"[""209, [2] s. ;""]",,"[""8°""]",,,,…,,,1929,209,"""Q12017640""",,,,,"[""cs""]","""František Zavřel""","""František Zavřel""",,"[""Československo""]",,"""český dramatik a spisovatel""","""Czech playwright and writer (1…",,,"[""spisovatel"", ""dramatik"", … ""státní úředník""]",,"[""Filozofická fakulta Univerzity Karlovy"", ""Právnická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Trhová Kamenice""]","[""Praha""]",1885.0,"""+1885-11-01T00:00:00Z""",1947.0,"""+1947-12-04T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",


In [13]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [14]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [15]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [16]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [17]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport L""",64
"""Transport Cc""",64
"""svěcení""",60
"""emigrace""",58
"""Transport Ds""",58


In [18]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1920):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).with_columns(pl.lit(nazev).alias("co"))

In [19]:
kriminal = zkusenost(sloupec="vezeni", nazev="pobyt ve vězení")

In [20]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="členství v KSČ")

In [21]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [22]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [23]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [24]:
web=zkusenost(sloupec="web",nazev="webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1920,1209,,,"""webové stránky"""
1921,1088,2,0.001838,"""webové stránky"""
1922,1249,3,0.002402,"""webové stránky"""
1923,1282,2,0.00156,"""webové stránky"""
1924,1433,5,0.003489,"""webové stránky"""
1925,1501,,,"""webové stránky"""
1926,1422,3,0.00211,"""webové stránky"""
1927,1380,5,0.003623,"""webové stránky"""
1928,1447,6,0.004147,"""webové stránky"""
1929,1469,9,0.006127,"""webové stránky"""


In [25]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1920,1209,21,0.01737,"""internace v Terezíně"""
1921,1088,7,0.006434,"""internace v Terezíně"""
1922,1249,9,0.007206,"""internace v Terezíně"""
1923,1282,12,0.00936,"""internace v Terezíně"""
1924,1433,30,0.020935,"""internace v Terezíně"""
1925,1501,20,0.013324,"""internace v Terezíně"""
1926,1422,28,0.019691,"""internace v Terezíně"""
1927,1380,29,0.021014,"""internace v Terezíně"""
1928,1447,27,0.018659,"""internace v Terezíně"""
1929,1469,29,0.019741,"""internace v Terezíně"""


In [26]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, fb])), 
    title={'text': ["Co měli spisovatelé za sebou nebo před sebou"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v konkrétním roce napsali",
        "lidé s určitou životní zkušeností – bez ohledu na to, kdy tuto",
        "zkušenost udělali. Povšimněte si drobných zubů v roce 1990:", 
        "po revoluci vycházeli jak lidé dříve věznění, tak vyloučení členové KSČ.",
        "Data jsou zde pochopitelně neúplná, reálné podíly budou spíše vyšší."
    ]}, width=300).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#445B78', '#5E2D3A', '#D6534B', '#9CA545','#E09DA3']),
              sort=["účet na Facebooku","pobyt ve vězení","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(height=100, width=220).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent")

podily_faceted

In [27]:
df.group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1801,5
1802,3
1803,5
1804,7
1805,9
1806,4
1807,9
1808,5
1809,4
1810,4
