In [1]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [4]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [5]:
len(wikid)

197515

In [6]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Hotmar, Josef,""","""jk01042432""","[""aut""]","""1933-""",,,,,"""ck8705554""",""" nam a22 4500""","""871218s1987 xr u0…","[""(Brož.) :""]","[""Kčs 8,00""]",,,,,,,,"""1""","""0""","""Tajemství výstřelu ve 21.40""",,"""Josef Hotmar ; graf. úprava I.…",,,,,,"[""127 s. ;""]",,"[""20 cm""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1987,127
"""1""","""Kvíčala, Jan,""","""jk01071219""","[""aut""]","""1861-1951""",,,,,"""bknzdr16770""",""" nam a22 i 4500""","""021116s1933 xr af g 0…","[""(Brožováno)""]",,,,,,,,,"""1""","""0""","""Průvodce havéřským kostelíčkem…",,"""Jan Kvíčala""",,,,,,"[""35 stran, 8 nečíslovaných stran obrazových příloh :""]","[""ilustrace ;""]","[""15 cm""]",,,,"[""7""]","[""průvodce""]","[""fd133154""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1933,35
"""1""","""Cetl, Tomáš,""","""jk01020630""","[""aut""]","""1939-""",,,,,"""cpk20041298870""",""" nam a22 a 4500""","""040322s2004 xr a e p 0…","[""(brož.) :""]","[""Kč 76,00""]","[""80-01-02859-3""]",,,,,,,"""1""","""0""","""Aplikace elektrochemických zdr…",,"""Tomáš Cetl""",,,,,,"[""145 s. :""]","[""il. ;""]","[""30 cm""]",,,,"[""7"", ""9""]","[""učebnice vysokých škol"", ""textbooks (higher)""]","[""fd133772"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2004,145


In [7]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [8]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [9]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Petrus, Jan,""","""jk01092856""","[""com""]","""1890-1968""",,,,,"""bk194900104""",""" nam a22 1 4500""","""980728s1948 xr …",,,,,,,,,,"""1""","""0""","""Dělníci boží""",,"""Jan Petrus""",,,,,,"[""315, [2] s. ;""]",,"[""8°""]",,,,…,,,1948,315,"""Q95150868""",,,,,[],"""Jan Petrus""","""Jan Petrus""",,"[""Československo""]",,"""Narozen 18.12.1890 ve Vsetíně,…","""Czechoslovak pedagogue""",,,"[""pedagog"", ""spisovatel"", … ""redaktor""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Vsetín""]","[""Praha""]",1890.0,"""+1890-12-18T00:00:00Z""",1968.0,"""+1968-01-14T00:00:00Z""",,,
"""1""","""Dvořák, František,""","""jn19981000835""","[""aut""]","""1942-""",,,,,"""cpk20223471195""",""" cam a22 a 4500""","""980414s1996 xr a r 0…",,,,,,,,,,"""1""","""0""","""Česká škola lyžování""","""běh na lyžích /""","""František Dvořák a kol.""",,,,,,"[""80 s. :""]","[""il.""]",,,,,…,,,1996,80,"""Q112351272""",,,,,[],"""František Dvořák""",,,,,"""Narozen 1942. Doc., PhDr., CSc…",,,,"[""vysokoškolský učitel""]",,,,,,,,"""muž""",,,1942.0,"""+1942-00-00T00:00:00Z""",,,,,
"""1""","""Sak, Robert,""","""jk01110308""","[""aut""]","""1933-2014""",,,,,"""cpk19960120405""",""" nam a22 a 4500""","""961011t19961995xr bf e 0…","[""(brož.)""]",,"[""80-85787-86-5""]",,,,,,,"""1""","""0""","""Anabáze""","""drama československých legioná…","""Robert Sak""",,,,,,"[""174 s., [32] s. il. :""]","[""mapky ;""]","[""20 cm""]",,,,…,,,1996,174,"""Q18114178""",,,,,"[""cs""]","""Robert Sak""","""Robert Sak""",,"[""Česko"", ""Československo""]",,"""český historik""","""Czech historian""",,,"[""historik"", ""spisovatel"", ""vysokoškolský učitel""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Charkov""]","[""České Budějovice""]",1933.0,"""+1933-01-19T00:00:00Z""",2014.0,"""+2014-08-14T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Hrzal, Ladislav,""","""jk01043075""","[""aut""]","""1923-""",,,,,"""bk196302098""",""" nam a22 1 4500""","""971015s1963 xr |…",,,,,,,,,,"""1""","""0""","""Společnost, lid, jednotlivec""","""aktuální otázky /""","""Ladislav Hrzal, Karel Mácha""",,,,,,"[""297 s. ;""]",,"[""8°""]",,,,…,,,1963,297,"""Q28673317""",,,,,"[""cs""]","""Ladislav Hrzal""","""Ladislav Hrzal""","[""Hana Hrzalová""]","[""Československo"", ""Česko""]",,"""český filozof""","""Czech philosopher""",,,"[""filozof"", ""vysokoškolský učitel""]",,,,"[""Komunistická strana Československa""]",,,,"""muž""","[""Praha""]",,1923.0,"""+1923-10-27T00:00:00Z""",2000.0,"""+2000-00-00T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Mašek, Karel,""","""jk01080547""","[""aut""]","""1867-1922""",,,,,"""nos190228585""",""" nam a22 1 4500""","""000911s1910 xr …",,,,,,,,,,"""1""","""0""","""Loutky""","""komedie o jednom dějství /""","""Karel Mašek""",,,,,,"[""54 s. ;""]",,"[""8°""]",,,,…,,,1910,54,"""Q1453341""",,"""přirozená smrt""",,,"[""de"", ""cs""]","""Karel Mašek""","""Karel Mašek""",,,,"""český básník, spisovatel, dram…","""Czech poet, playwright, public…",,"""tuberkulóza""","[""překladatel"", ""dramatik"", … ""právník""]",,"[""Právnická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Praha""]","[""Hradčany"", ""Praha""]",1867.0,"""+1867-12-29T00:00:00Z""",1922.0,"""+1922-09-13T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Válek, Jiří Miloš,""","""jk01141212""","[""aut""]","""1923-2005""",,,,,"""ck8703579""",""" nam a22 4500""","""870921s1987 xr u0…","[""(Brož.) :""]","[""5 Kčs""]",,,,,,,,"""1""","""0""","""Italské hudební názvosloví""",,"""Jiří Válek ; Obálka a graf. úp…",,,,,,"[""151 s. ;""]",,"[""12 cm""]",,,,…,,,1987,151,"""Q3179338""",,,,,"[""fi"", ""fr"", … ""cs""]","""Jiří Válek""","""Jiří Válek""",,"[""Česko""]",,"""český hudební skladatel""","""Czech composer (1923-2005)""",,,"[""hudební skladatel""]",,,,,,,,"""muž""","[""Praha""]","[""Praha""]",1923.0,"""+1923-05-28T00:00:00Z""",2005.0,"""+2005-10-06T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Niederle, Lubor,""","""jk01090178""","[""aut""]","""1865-1944""",,,,,"""bk193102508""",""" nam a22 1 4500""","""991111s1931 xr …",,,,,,,,,,"""1""","""0""","""Rukověť slovanské archeologie""","""Manuel de l'archéologie slave …","""Lubor Niederle""",,,,,,"[""VII, 292, [II] s. :""]","[""[I] mapa ;""]","[""8°""]",,,,…,,,1931,292,"""Q728706""","[""Řád sv. Stanislava 2. třídy"", ""Řád svatého Alexandra"", ""velkodůstojník Řádu čestné legie""]",,,,"[""bg"", ""tr"", … ""cs""]","""Lubor Niederle""","""Lubor Niederle""",,"[""Československo""]",,"""český archeolog""","""Czech archeologist (1865–1944)""","[""Marcel Niederle""]",,"[""antropolog"", ""archeolog"", … ""slavista""]",,"[""Univerzita Karlova"", ""Filozofická fakulta Univerzity Karlovy""]","[""Václav Niederle"", ""Bohuslav Niederle""]",,,,,"""muž""","[""Klatovy""]","[""Praha""]",1865.0,"""+1865-09-20T00:00:00Z""",1944.0,"""+1944-06-14T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Hrubý, Hynek,""","""jk01042941""","[""aut""]","""1873-""",,,,,"""bk193601755""",""" nam a22 1 4500""","""990217s1936 xr …",,,,,,,,,,"""1""","""0""","""Německá učebnice a čítanka pro…","""Zároveň příručka pro živnostni…","""Sepsal prof. Hynek Hrubý""",,,,,,"[""264 s. :""]","[""[I] obr. příl. ;""]","[""8°""]",,,,…,,,1936,264,"""Q95070846""",,,,,[],"""Hynek Hrubý""","""Hynek Hrubý""",,,,"""Narozen 30.8.1873 v Praze. Stř…",,,,"[""literární historik"", ""středoškolský učitel""]",,,,,,,,"""muž""","[""Praha""]",,1873.0,"""+1873-08-30T00:00:00Z""",,,,,
"""1""","""Rodrová, Jana,""","""ola2016914242""","[""aut""]","""1965-""",,,,,"""nkc20183046078""",""" nam a22 ia4500""","""181016m20162018xr abcgg f 0…","[""([1] ;"", ""kroužková vazba)"", … ""kroužková vazba)""]",,"[""978-80-260-9252-0"", ""978-80-270-4488-7""]",,,,,,,"""1""","""0""","""Čeština pro cizince""","""jazykové hry /""","""Jana Rodrová, Markéta Vymětalo…",,,,,,"[""2 svazky (67; 118 stran) :""]","[""ilustrace (převážně barevné), mapy, portréty, noty ;""]","[""21 cm""]",,,,…,,,2016,118,"""Q95397638""",,,,,[],"""Jana Rodrová""","""Jana Rodrová""",,,,"""Narozena 25. 1. 1965. Mgr., le…","""spokesperson""",,,"[""lektor"", ""mluvčí""]",,,,,,,,"""žena""",,,1965.0,"""+1965-01-25T00:00:00Z""",,,,,
"""1""","""Paulík, Ivo,""","""mzk2006323099""","[""aut"", ""pht""]","""1975-""",,,,,"""nkc20142611749""",""" cam a22 i 4500""","""140409s2014 xr ab g f 0…","[""(brožováno)""]",,"[""978-80-7451-135-6""]",,,,,,,"""1""","""0""","""Olomoucko a Valašsko""",,"""text a fotografie: Ivo Paulík""",,,,,,"[""128 stran :""]","[""barevné ilustrace, mapy ;""]","[""21 cm""]",,,,…,,,2014,128,"""Q95481030""",,,,,[],"""Ivo Paulík""","""Ivo Paulík""",,,,"""Narozen 27.7. 1975 v Praze. Au…",,,,,,,,,,,,"""muž""","[""Praha""]",,1975.0,"""+1975-07-27T00:00:00Z""",,,,,


In [10]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [11]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [12]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [13]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Petrus, Jan,""","""jk01092856""","[""com""]","""1890-1968""",,,,,"""bk194900104""",""" nam a22 1 4500""","""980728s1948 xr …",,,,,,,,,,"""1""","""0""","""Dělníci boží""",,"""Jan Petrus""",,,,,,"[""315, [2] s. ;""]",,"[""8°""]",,,,…,,,1948,315,"""Q95150868""",,,,,[],"""Jan Petrus""","""Jan Petrus""",,"[""Československo""]",,"""Narozen 18.12.1890 ve Vsetíně,…","""Czechoslovak pedagogue""",,,"[""pedagog"", ""spisovatel"", … ""redaktor""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Vsetín""]","[""Praha""]",1890.0,"""+1890-12-18T00:00:00Z""",1968.0,"""+1968-01-14T00:00:00Z""",,,
"""1""","""Dvořák, František,""","""jn19981000835""","[""aut""]","""1942-""",,,,,"""cpk20223471195""",""" cam a22 a 4500""","""980414s1996 xr a r 0…",,,,,,,,,,"""1""","""0""","""Česká škola lyžování""","""běh na lyžích /""","""František Dvořák a kol.""",,,,,,"[""80 s. :""]","[""il.""]",,,,,…,,,1996,80,"""Q112351272""",,,,,[],"""František Dvořák""",,,,,"""Narozen 1942. Doc., PhDr., CSc…",,,,"[""vysokoškolský učitel""]",,,,,,,,"""muž""",,,1942.0,"""+1942-00-00T00:00:00Z""",,,,,
"""1""","""Sak, Robert,""","""jk01110308""","[""aut""]","""1933-2014""",,,,,"""cpk19960120405""",""" nam a22 a 4500""","""961011t19961995xr bf e 0…","[""(brož.)""]",,"[""80-85787-86-5""]",,,,,,,"""1""","""0""","""Anabáze""","""drama československých legioná…","""Robert Sak""",,,,,,"[""174 s., [32] s. il. :""]","[""mapky ;""]","[""20 cm""]",,,,…,,,1996,174,"""Q18114178""",,,,,"[""cs""]","""Robert Sak""","""Robert Sak""",,"[""Česko"", ""Československo""]",,"""český historik""","""Czech historian""",,,"[""historik"", ""spisovatel"", ""vysokoškolský učitel""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Charkov""]","[""České Budějovice""]",1933.0,"""+1933-01-19T00:00:00Z""",2014.0,"""+2014-08-14T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Hrzal, Ladislav,""","""jk01043075""","[""aut""]","""1923-""",,,,,"""bk196302098""",""" nam a22 1 4500""","""971015s1963 xr |…",,,,,,,,,,"""1""","""0""","""Společnost, lid, jednotlivec""","""aktuální otázky /""","""Ladislav Hrzal, Karel Mácha""",,,,,,"[""297 s. ;""]",,"[""8°""]",,,,…,,,1963,297,"""Q28673317""",,,,,"[""cs""]","""Ladislav Hrzal""","""Ladislav Hrzal""","[""Hana Hrzalová""]","[""Československo"", ""Česko""]",,"""český filozof""","""Czech philosopher""",,,"[""filozof"", ""vysokoškolský učitel""]",,,,"[""Komunistická strana Československa""]",,,,"""muž""","[""Praha""]",,1923.0,"""+1923-10-27T00:00:00Z""",2000.0,"""+2000-00-00T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Mašek, Karel,""","""jk01080547""","[""aut""]","""1867-1922""",,,,,"""nos190228585""",""" nam a22 1 4500""","""000911s1910 xr …",,,,,,,,,,"""1""","""0""","""Loutky""","""komedie o jednom dějství /""","""Karel Mašek""",,,,,,"[""54 s. ;""]",,"[""8°""]",,,,…,,,1910,54,"""Q1453341""",,"""přirozená smrt""",,,"[""de"", ""cs""]","""Karel Mašek""","""Karel Mašek""",,,,"""český básník, spisovatel, dram…","""Czech poet, playwright, public…",,"""tuberkulóza""","[""překladatel"", ""dramatik"", … ""právník""]",,"[""Právnická fakulta Univerzity Karlovy""]",,,,,,"""muž""","[""Praha""]","[""Hradčany"", ""Praha""]",1867.0,"""+1867-12-29T00:00:00Z""",1922.0,"""+1922-09-13T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Válek, Jiří Miloš,""","""jk01141212""","[""aut""]","""1923-2005""",,,,,"""ck8703579""",""" nam a22 4500""","""870921s1987 xr u0…","[""(Brož.) :""]","[""5 Kčs""]",,,,,,,,"""1""","""0""","""Italské hudební názvosloví""",,"""Jiří Válek ; Obálka a graf. úp…",,,,,,"[""151 s. ;""]",,"[""12 cm""]",,,,…,,,1987,151,"""Q3179338""",,,,,"[""fi"", ""fr"", … ""cs""]","""Jiří Válek""","""Jiří Válek""",,"[""Česko""]",,"""český hudební skladatel""","""Czech composer (1923-2005)""",,,"[""hudební skladatel""]",,,,,,,,"""muž""","[""Praha""]","[""Praha""]",1923.0,"""+1923-05-28T00:00:00Z""",2005.0,"""+2005-10-06T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Niederle, Lubor,""","""jk01090178""","[""aut""]","""1865-1944""",,,,,"""bk193102508""",""" nam a22 1 4500""","""991111s1931 xr …",,,,,,,,,,"""1""","""0""","""Rukověť slovanské archeologie""","""Manuel de l'archéologie slave …","""Lubor Niederle""",,,,,,"[""VII, 292, [II] s. :""]","[""[I] mapa ;""]","[""8°""]",,,,…,,,1931,292,"""Q728706""","[""Řád sv. Stanislava 2. třídy"", ""Řád svatého Alexandra"", ""velkodůstojník Řádu čestné legie""]",,,,"[""bg"", ""tr"", … ""cs""]","""Lubor Niederle""","""Lubor Niederle""",,"[""Československo""]",,"""český archeolog""","""Czech archeologist (1865–1944)""","[""Marcel Niederle""]",,"[""antropolog"", ""archeolog"", … ""slavista""]",,"[""Univerzita Karlova"", ""Filozofická fakulta Univerzity Karlovy""]","[""Václav Niederle"", ""Bohuslav Niederle""]",,,,,"""muž""","[""Klatovy""]","[""Praha""]",1865.0,"""+1865-09-20T00:00:00Z""",1944.0,"""+1944-06-14T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"
"""1""","""Hrubý, Hynek,""","""jk01042941""","[""aut""]","""1873-""",,,,,"""bk193601755""",""" nam a22 1 4500""","""990217s1936 xr …",,,,,,,,,,"""1""","""0""","""Německá učebnice a čítanka pro…","""Zároveň příručka pro živnostni…","""Sepsal prof. Hynek Hrubý""",,,,,,"[""264 s. :""]","[""[I] obr. příl. ;""]","[""8°""]",,,,…,,,1936,264,"""Q95070846""",,,,,[],"""Hynek Hrubý""","""Hynek Hrubý""",,,,"""Narozen 30.8.1873 v Praze. Stř…",,,,"[""literární historik"", ""středoškolský učitel""]",,,,,,,,"""muž""","[""Praha""]",,1873.0,"""+1873-08-30T00:00:00Z""",,,,,
"""1""","""Rodrová, Jana,""","""ola2016914242""","[""aut""]","""1965-""",,,,,"""nkc20183046078""",""" nam a22 ia4500""","""181016m20162018xr abcgg f 0…","[""([1] ;"", ""kroužková vazba)"", … ""kroužková vazba)""]",,"[""978-80-260-9252-0"", ""978-80-270-4488-7""]",,,,,,,"""1""","""0""","""Čeština pro cizince""","""jazykové hry /""","""Jana Rodrová, Markéta Vymětalo…",,,,,,"[""2 svazky (67; 118 stran) :""]","[""ilustrace (převážně barevné), mapy, portréty, noty ;""]","[""21 cm""]",,,,…,,,2016,118,"""Q95397638""",,,,,[],"""Jana Rodrová""","""Jana Rodrová""",,,,"""Narozena 25. 1. 1965. Mgr., le…","""spokesperson""",,,"[""lektor"", ""mluvčí""]",,,,,,,,"""žena""",,,1965.0,"""+1965-01-25T00:00:00Z""",,,,,
"""1""","""Paulík, Ivo,""","""mzk2006323099""","[""aut"", ""pht""]","""1975-""",,,,,"""nkc20142611749""",""" cam a22 i 4500""","""140409s2014 xr ab g f 0…","[""(brožováno)""]",,"[""978-80-7451-135-6""]",,,,,,,"""1""","""0""","""Olomoucko a Valašsko""",,"""text a fotografie: Ivo Paulík""",,,,,,"[""128 stran :""]","[""barevné ilustrace, mapy ;""]","[""21 cm""]",,,,…,,,2014,128,"""Q95481030""",,,,,[],"""Ivo Paulík""","""Ivo Paulík""",,,,"""Narozen 27.7. 1975 v Praze. Au…",,,,,,,,,,,,"""muž""","[""Praha""]",,1975.0,"""+1975-07-27T00:00:00Z""",,,,,


In [14]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [15]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [16]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [17]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [18]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport L""",64
"""Transport Cc""",64
"""svěcení""",60
"""Transport Ds""",58
"""emigrace""",58


In [19]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1900):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).sort(by="rok").with_columns(pl.lit(nazev).alias("co")) # .with_columns(pl.col("podil").rolling_mean(window_size=2))

In [20]:
kriminal = zkusenost(sloupec="vezeni", nazev="…internaci ve vězení nebo koncentračním táboře")

In [21]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="…členství v komunistické straně")

In [22]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [23]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [24]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [25]:
web=zkusenost(sloupec="web",nazev="…osobní webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,,,"""…osobní webové stránky"""
1901,602,,,"""…osobní webové stránky"""
1902,745,1,0.001342,"""…osobní webové stránky"""
1903,631,,,"""…osobní webové stránky"""
1904,648,1,0.001543,"""…osobní webové stránky"""
1905,613,1,0.001631,"""…osobní webové stránky"""
1906,665,1,0.001504,"""…osobní webové stránky"""
1907,645,1,0.00155,"""…osobní webové stránky"""
1908,788,2,0.002538,"""…osobní webové stránky"""
1909,727,,,"""…osobní webové stránky"""


In [26]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,1,0.002315,"""internace v Terezíně"""
1901,602,1,0.001661,"""internace v Terezíně"""
1902,745,1,0.001342,"""internace v Terezíně"""
1903,631,2,0.00317,"""internace v Terezíně"""
1904,648,3,0.00463,"""internace v Terezíně"""
1905,613,2,0.003263,"""internace v Terezíně"""
1906,665,2,0.003008,"""internace v Terezíně"""
1907,645,5,0.007752,"""internace v Terezíně"""
1908,788,5,0.006345,"""internace v Terezíně"""
1909,727,4,0.005502,"""internace v Terezíně"""


In [67]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, web])), 
    title={'text': ["Kolik českých autorů mělo za sebou nebo před sebou…"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v daném roce napsali lidé s určitou",
         "zkušeností – bez ohledu na to, kdy ji udělali. Zuby v roce 1990 znamenají,", 
        "že po revoluci začali vycházet jak lidé dříve věznění, tak vyloučení z KSČ.",
        "Data jsou neúplná, reálné podíly budou spíše vyšší; podstatné jsou zde trendy."
    ]}).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6', tickCount=10)
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#5E2D3A', '#D6534B', '#445B78']),
              sort=["účet na Facebooku","pobyt ve vězení nebo koncentračním táboře","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelFontWeight=500, labelFont='Asap', labelOrient="top"),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(
    width=kredity['sirka'], 
    height=kredity['vyska_nizkych'], 
    autosize={'type': 'fit', 'contains': 'padding'}
).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent").configure_view(stroke='transparent').configure_axis(grid=False, domain=False)

podily_faceted

In [69]:
me_to_neurazi(podily_faceted, soubor="02_zkusenosti", kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Kolik českých autorů mělo za sebou nebo před sebou…']“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>
