In [8]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [10]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [11]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [12]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [13]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Erneker, Jaroslav,""","""jk01030565""","[""aut""]","""1937-""",,,,,"""bk198203004""",""" nam a22 1 4500""","""961116s1981 xr …","[""(Brož.) :""]","[""Kčs 10,00""]",,,,,,,,"""1""","""0""","""Společenská aktivita v sociali…",,"""Jaroslav Erneker""",,,,,,"[""166 s. ;""]",,"[""8°""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1981,166
"""1""","""Čermák, Antonín,""","""jk01021278""","[""aut""]","""1776-1837""",,,,,"""nkc20122397560""",""" nam a22 i 4500""","""120917s1831 xr e 0…","[""(Brožováno)""]",,,,,,,,,"""1""","""0""","""Kázanj na dny rozličných swatý…",,"""od Antonjna Čermáka""",,,,,,"[""268 stran ;""]",,"[""18 cm""]",,,,"[""7""]","[""kázání""]","[""fd131846""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1831,268
"""1""","""Mareš, Tomáš,""","""ntka173333""","[""aut""]","""1972-""",,,,,"""nkc20071713636""",""" nam a22 a 4500""","""070323s2007 xr a e p 0…","[""(brož.)""]",,"[""978-80-01-03696-9""]",,,,,,,"""1""","""0""","""Konstrukční optimalizace""",,"""Tomáš Mareš""",,,,,,"[""106 s. :""]","[""il. ;""]","[""30 cm""]",,,,"[""7"", ""9""]","[""učebnice vysokých škol"", ""textbooks (higher)""]","[""fd133772"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2007,106


In [14]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [15]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [16]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Rezek, Josef,""","""jk01102388""","[""edt""]","""1876-""",,,,,"""bknbjn00333""",""" nam a22 1 4500""","""011109s1911 xr d p 0…",,,,,,,,,,"""1""","""0""","""Nauka občanská pro dvoutřídní …",,"""Upravili Josef Rezek a Václav …",,,,,,"[""88 s. ;""]",,"[""8°""]",,,,…,,,1911,88,"""Q112349106""",,,,,[],"""Josef Rezek""","""Josef Rezek""",,,,"""Narozen roku 1876 ve Vlkově. S…",,,,"[""profesor""]",,,,,,,,"""muž""",,,1876.0,"""+1876-00-00T00:00:00Z""",,,,,
"""1""","""Šedivý, Josef,""","""jk01122676""","[""aut""]","""1919-""",,,,,"""bk196901975""",""" nam a22 1 4500""","""970911s1969 xr …",,,,,,,,,,"""1""","""0""","""Technologie chemických vláken""","""Učební text pro 1. roč. odb. u…","""[Autoři:] Josef Šedivý, Jindři…",,,,,,"[""139, [1] s. ;""]",,"[""8°""]",,,,…,,,1969,139,"""Q95176009""",,,,,[],"""Josef Šedivý""","""Josef Šedivý""",,"[""Československo""]",,"""Narozen 24.11.1919 v Praze. RN…",,,,,,,,,,,,"""muž""","[""Praha""]",,1919.0,"""+1919-11-24T00:00:00Z""",,,,,
"""1""","""Hořica, Ignát,""","""jk01042344""","[""aut""]","""1859-1902""",,,,,"""bknzdr09827""",""" nam a22 1 4500""","""020420s1896 xr e 0…","[""(Váz.)""]",,,,,,,,,"""1""","""0""","""Osoby a věci v Chorvatsku""",,"""napsal Ignát Hořica""",,,,,,"[""84 s. ;""]",,"[""16 cm""]",,,,…,,,1896,84,"""Q7940602""",,,,,"[""cs""]","""Ignát Hořica""","""Ignát Hořica""","[""Marie Laudová""]","[""Předlitavsko""]",,"""český poslanec Českého zemskéh…","""Czech member of Czech council,…",,"""kardiovaskulární onemocnění""","[""překladatel"", ""novinář"", … ""redaktor""]",,,,"[""Národní strana svobodomyslná""]",,,,"""muž""","[""Brno""]","[""Marseille"", ""Malesice""]",1859.0,"""+1859-07-28T00:00:00Z""",1902.0,"""+1902-04-03T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Malinovský, Lubomír,""","""jk01073262""","[""aut""]","""1931-1997""",,,,,"""ck8804385""",""" nam a22 4500""","""880906s1987 xr 0…","[""(Brož.) :""]","[""Kčs 6,00""]",,,,,,,,"""1""","""0""","""Kapitoly z obecné anatomie""",,"""Lubomír Malinovský""",,,,,,"[""70 s. ;""]",,"[""30 cm""]",,,,…,,,1987,70,"""Q95175416""",,,,,[],"""Lubomír Malinovský""","""Lubomír Malinovský""",,"[""Československo""]",,"""Narozen 25.3.1931 v Šelešovicí…",,,,,,,,,,,,"""muž""","[""Brno""]",,1931.0,"""+1931-03-25T00:00:00Z""",1997.0,"""+1997-04-01T00:00:00Z""",,,
"""1""","""Vedral, Jiří,""","""mzk2003169026""","[""aut""]","""1973-""",,,,,"""nkc20051448821""",""" nam a22 a 4500""","""051021s2005 xr g d 0…","[""(brož.)""]",,"[""80-86711-74-9""]",,,,,,,"""1""","""0""","""Dánsko-český biologický slovní…",,"""J. Vedral""",,,,,,"[""40 s. ;""]",,"[""21 cm""]",,,,…,,,2005,40,"""Q88304255""",,,,,[],"""Jiří Vedral""","""Jiří Vedral""",,"[""Česko""]",,,"""Czech translator and lexicogra…",,,"[""překladatel"", ""lexikograf""]",,,,,,,,"""muž""","[""Praha""]",,1973.0,"""+1973-08-29T00:00:00Z""",,,,,
"""1""","""Jáneš, Vlastimil,""","""jo2001100075""","[""aut""]","""1935-""",,,,,"""ck8402416""",""" nam a22 4500""","""840710s1984 xr a u0…","[""(Brož.) :""]","[""17,50 Kčs""]",,,,,,,,"""1""","""0""","""Technické vybavení počítačů pr…",,"""Vlastimil Jáneš ; Peter Liška …",,,,,,"[""374 s. :""]","[""obr., fot., schémata ;""]","[""29 cm""]",,,,…,,,1984,374,"""Q95108274""",,,,,[],"""Vlastimil Jáneš""","""Vlastimil Jáneš""",,,,"""Narozen 28.10.1935 v Horním Tř…",,,,,"[""tajný spolupracovník""]",,,,,,,"""muž""","[""Horní Třešňovec""]",,1935.0,"""+1935-10-28T00:00:00Z""",,,,,
"""1""","""Prouza, Daniel,""","""mzk2006317981""","[""aut""]","""1975-""",,,,,"""nkc20061637415""",""" nam a22 a 4500""","""060110s2005 xr f 0…","[""(v knize neuvedeno ;"", ""brož.) :""]","[""neprodejné""]","[""80-239-6622-7""]",,,,,,,"""1""","""0""","""Daňová kriminalita""",,"""Daniel Prouza""",,,,,,"[""260 s. ;""]",,"[""21 cm""]",,,,…,,,2005,260,"""Q95113390""",,,,,[],"""Daniel Prouza""","""Daniel Prouza""",,,,"""Narozen 3. 1. 1975 v Lanškroun…",,,,,,,,,,,,"""muž""","[""Lanškroun""]",,1975.0,"""+1975-01-03T00:00:00Z""",,,,,
"""1""","""Konečná, Marika""","""jx20080808011""","[""aut""]",,,,,,"""np9418546""",""" nam a22 4500""","""940426s1993 xr a u0…","[""(brož.) :""]","[""Kč 20,00""]","[""80-7079-819-X""]",,,,,,,"""1""","""0""","""Založení a řízení malých a stř…","""určeno pro stud. fak. podnikoh…","""Marika Konečná""",,,,,,"[""187 s. :""]","[""tab. ;""]","[""29 cm""]",,,,…,,,1993,187,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Hrušovský, Ján,""","""jn19990209273""","[""aut""]","""1892-1975""",,,,,"""cpk20081797411""",""" nam a22 a 4500""","""080321s1936 xr …",,,,,,,,,,"""1""","""0""","""Peter Pavel na prahu Nového sv…",,"""Ján Hrušovský""","[""[Kniha druhá] /""]",,,,,"[""251 s. ;""]",,"[""8°""]",,,,…,,,1936,251,"""Q12027367""",,,,,"[""cs"", ""sk""]","""Ján Hrušovský""","""Ján Hrušovský""",,"[""Československo""]",,"""slovenský spisovatel""","""Slovak writer (1892–1975)""","[""Ivan Hrušovský""]",,"[""spisovatel"", ""novinář""]",,,,,,,,"""muž""","[""Nové Mesto nad Váhom""]","[""Bratislava""]",1892.0,"""+1892-02-04T00:00:00Z""",1975.0,"""+1975-03-07T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Okamura, Tomio,""","""xx0092464""","[""aut""]","""1972-""",,,,,"""nkc20132469428""",""" nam a22 a 4500""","""130624s2013 xr a e 0…","[""(váz.)""]",,"[""978-80-253-1963-5""]",,,,,,,"""1""","""0""","""Umění přímé demokracie""","""kam dál, Česko? /""","""Tomio Okamura, Jaroslav N. Več…",,,,,,"[""111 s. :""]","[""il. ;""]","[""24 cm""]",,,,…,,,2013,111,"""Q2223090""","[""Bludný balvan""]",,"""tomio.cz""","""tomio.cz""","[""it"", ""ru"", … ""cs""]","""Tomio Okamura""","""Tomio Okamura""",,"[""Česko""]",,"""český podnikatel, politik, pos…","""Czech far-right politician""","[""Ruy Okamura""]",,"[""politik"", ""podnikatel"", … ""travel agent""]",,,"[""Osamu Okamura"", ""Hayato Okamura""]","[""Úsvit – Národní koalice"", ""Svoboda a přímá demokracie""]","""tomio_cz""",,,"""muž""","[""Itabaši""]",,1972.0,"""+1972-07-04T00:00:00Z""",,,"""http://www.tomio.cz/""","""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"


In [17]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [18]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [19]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [20]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Rezek, Josef,""","""jk01102388""","[""edt""]","""1876-""",,,,,"""bknbjn00333""",""" nam a22 1 4500""","""011109s1911 xr d p 0…",,,,,,,,,,"""1""","""0""","""Nauka občanská pro dvoutřídní …",,"""Upravili Josef Rezek a Václav …",,,,,,"[""88 s. ;""]",,"[""8°""]",,,,…,,,1911,88,"""Q112349106""",,,,,[],"""Josef Rezek""","""Josef Rezek""",,,,"""Narozen roku 1876 ve Vlkově. S…",,,,"[""profesor""]",,,,,,,,"""muž""",,,1876.0,"""+1876-00-00T00:00:00Z""",,,,,
"""1""","""Šedivý, Josef,""","""jk01122676""","[""aut""]","""1919-""",,,,,"""bk196901975""",""" nam a22 1 4500""","""970911s1969 xr …",,,,,,,,,,"""1""","""0""","""Technologie chemických vláken""","""Učební text pro 1. roč. odb. u…","""[Autoři:] Josef Šedivý, Jindři…",,,,,,"[""139, [1] s. ;""]",,"[""8°""]",,,,…,,,1969,139,"""Q95176009""",,,,,[],"""Josef Šedivý""","""Josef Šedivý""",,"[""Československo""]",,"""Narozen 24.11.1919 v Praze. RN…",,,,,,,,,,,,"""muž""","[""Praha""]",,1919.0,"""+1919-11-24T00:00:00Z""",,,,,
"""1""","""Hořica, Ignát,""","""jk01042344""","[""aut""]","""1859-1902""",,,,,"""bknzdr09827""",""" nam a22 1 4500""","""020420s1896 xr e 0…","[""(Váz.)""]",,,,,,,,,"""1""","""0""","""Osoby a věci v Chorvatsku""",,"""napsal Ignát Hořica""",,,,,,"[""84 s. ;""]",,"[""16 cm""]",,,,…,,,1896,84,"""Q7940602""",,,,,"[""cs""]","""Ignát Hořica""","""Ignát Hořica""","[""Marie Laudová""]","[""Předlitavsko""]",,"""český poslanec Českého zemskéh…","""Czech member of Czech council,…",,"""kardiovaskulární onemocnění""","[""překladatel"", ""novinář"", … ""redaktor""]",,,,"[""Národní strana svobodomyslná""]",,,,"""muž""","[""Brno""]","[""Marseille"", ""Malesice""]",1859.0,"""+1859-07-28T00:00:00Z""",1902.0,"""+1902-04-03T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Malinovský, Lubomír,""","""jk01073262""","[""aut""]","""1931-1997""",,,,,"""ck8804385""",""" nam a22 4500""","""880906s1987 xr 0…","[""(Brož.) :""]","[""Kčs 6,00""]",,,,,,,,"""1""","""0""","""Kapitoly z obecné anatomie""",,"""Lubomír Malinovský""",,,,,,"[""70 s. ;""]",,"[""30 cm""]",,,,…,,,1987,70,"""Q95175416""",,,,,[],"""Lubomír Malinovský""","""Lubomír Malinovský""",,"[""Československo""]",,"""Narozen 25.3.1931 v Šelešovicí…",,,,,,,,,,,,"""muž""","[""Brno""]",,1931.0,"""+1931-03-25T00:00:00Z""",1997.0,"""+1997-04-01T00:00:00Z""",,,
"""1""","""Vedral, Jiří,""","""mzk2003169026""","[""aut""]","""1973-""",,,,,"""nkc20051448821""",""" nam a22 a 4500""","""051021s2005 xr g d 0…","[""(brož.)""]",,"[""80-86711-74-9""]",,,,,,,"""1""","""0""","""Dánsko-český biologický slovní…",,"""J. Vedral""",,,,,,"[""40 s. ;""]",,"[""21 cm""]",,,,…,,,2005,40,"""Q88304255""",,,,,[],"""Jiří Vedral""","""Jiří Vedral""",,"[""Česko""]",,,"""Czech translator and lexicogra…",,,"[""překladatel"", ""lexikograf""]",,,,,,,,"""muž""","[""Praha""]",,1973.0,"""+1973-08-29T00:00:00Z""",,,,,
"""1""","""Jáneš, Vlastimil,""","""jo2001100075""","[""aut""]","""1935-""",,,,,"""ck8402416""",""" nam a22 4500""","""840710s1984 xr a u0…","[""(Brož.) :""]","[""17,50 Kčs""]",,,,,,,,"""1""","""0""","""Technické vybavení počítačů pr…",,"""Vlastimil Jáneš ; Peter Liška …",,,,,,"[""374 s. :""]","[""obr., fot., schémata ;""]","[""29 cm""]",,,,…,,,1984,374,"""Q95108274""",,,,,[],"""Vlastimil Jáneš""","""Vlastimil Jáneš""",,,,"""Narozen 28.10.1935 v Horním Tř…",,,,,"[""tajný spolupracovník""]",,,,,,,"""muž""","[""Horní Třešňovec""]",,1935.0,"""+1935-10-28T00:00:00Z""",,,,,
"""1""","""Prouza, Daniel,""","""mzk2006317981""","[""aut""]","""1975-""",,,,,"""nkc20061637415""",""" nam a22 a 4500""","""060110s2005 xr f 0…","[""(v knize neuvedeno ;"", ""brož.) :""]","[""neprodejné""]","[""80-239-6622-7""]",,,,,,,"""1""","""0""","""Daňová kriminalita""",,"""Daniel Prouza""",,,,,,"[""260 s. ;""]",,"[""21 cm""]",,,,…,,,2005,260,"""Q95113390""",,,,,[],"""Daniel Prouza""","""Daniel Prouza""",,,,"""Narozen 3. 1. 1975 v Lanškroun…",,,,,,,,,,,,"""muž""","[""Lanškroun""]",,1975.0,"""+1975-01-03T00:00:00Z""",,,,,
"""1""","""Konečná, Marika""","""jx20080808011""","[""aut""]",,,,,,"""np9418546""",""" nam a22 4500""","""940426s1993 xr a u0…","[""(brož.) :""]","[""Kč 20,00""]","[""80-7079-819-X""]",,,,,,,"""1""","""0""","""Založení a řízení malých a stř…","""určeno pro stud. fak. podnikoh…","""Marika Konečná""",,,,,,"[""187 s. :""]","[""tab. ;""]","[""29 cm""]",,,,…,,,1993,187,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Hrušovský, Ján,""","""jn19990209273""","[""aut""]","""1892-1975""",,,,,"""cpk20081797411""",""" nam a22 a 4500""","""080321s1936 xr …",,,,,,,,,,"""1""","""0""","""Peter Pavel na prahu Nového sv…",,"""Ján Hrušovský""","[""[Kniha druhá] /""]",,,,,"[""251 s. ;""]",,"[""8°""]",,,,…,,,1936,251,"""Q12027367""",,,,,"[""cs"", ""sk""]","""Ján Hrušovský""","""Ján Hrušovský""",,"[""Československo""]",,"""slovenský spisovatel""","""Slovak writer (1892–1975)""","[""Ivan Hrušovský""]",,"[""spisovatel"", ""novinář""]",,,,,,,,"""muž""","[""Nové Mesto nad Váhom""]","[""Bratislava""]",1892.0,"""+1892-02-04T00:00:00Z""",1975.0,"""+1975-03-07T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Okamura, Tomio,""","""xx0092464""","[""aut""]","""1972-""",,,,,"""nkc20132469428""",""" nam a22 a 4500""","""130624s2013 xr a e 0…","[""(váz.)""]",,"[""978-80-253-1963-5""]",,,,,,,"""1""","""0""","""Umění přímé demokracie""","""kam dál, Česko? /""","""Tomio Okamura, Jaroslav N. Več…",,,,,,"[""111 s. :""]","[""il. ;""]","[""24 cm""]",,,,…,,,2013,111,"""Q2223090""","[""Bludný balvan""]",,"""tomio.cz""","""tomio.cz""","[""it"", ""ru"", … ""cs""]","""Tomio Okamura""","""Tomio Okamura""",,"[""Česko""]",,"""český podnikatel, politik, pos…","""Czech far-right politician""","[""Ruy Okamura""]",,"[""politik"", ""podnikatel"", … ""travel agent""]",,,"[""Osamu Okamura"", ""Hayato Okamura""]","[""Úsvit – Národní koalice"", ""Svoboda a přímá demokracie""]","""tomio_cz""",,,"""muž""","[""Itabaši""]",,1972.0,"""+1972-07-04T00:00:00Z""",,,"""http://www.tomio.cz/""","""https://cs.wikipedia.org/wiki/…","""https://en.wikipedia.org/wiki/…"


In [21]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [22]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [23]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [24]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [25]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport L""",64
"""Transport Cc""",64
"""svěcení""",60
"""emigrace""",58
"""Transport Ds""",58


In [26]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1920):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).with_columns(pl.lit(nazev).alias("co"))

In [27]:
kriminal = zkusenost(sloupec="vezeni", nazev="pobyt ve vězení")

In [28]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="členství v KSČ")

In [30]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [31]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [33]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [38]:
web=zkusenost(sloupec="web",nazev="webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1920,1209,,,"""webové stránky"""
1921,1088,2,0.001838,"""webové stránky"""
1922,1249,3,0.002402,"""webové stránky"""
1923,1282,2,0.00156,"""webové stránky"""
1924,1433,5,0.003489,"""webové stránky"""
1925,1501,,,"""webové stránky"""
1926,1422,3,0.00211,"""webové stránky"""
1927,1380,5,0.003623,"""webové stránky"""
1928,1447,6,0.004147,"""webové stránky"""
1929,1469,9,0.006127,"""webové stránky"""


In [43]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1920,1209,21,0.01737,"""internace v Terezíně"""
1921,1088,7,0.006434,"""internace v Terezíně"""
1922,1249,9,0.007206,"""internace v Terezíně"""
1923,1282,12,0.00936,"""internace v Terezíně"""
1924,1433,30,0.020935,"""internace v Terezíně"""
1925,1501,20,0.013324,"""internace v Terezíně"""
1926,1422,28,0.019691,"""internace v Terezíně"""
1927,1380,29,0.021014,"""internace v Terezíně"""
1928,1447,27,0.018659,"""internace v Terezíně"""
1929,1469,29,0.019741,"""internace v Terezíně"""


In [68]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, web])), 
    title={'text': ["Co měli čeští spisovatelé za sebou nebo před sebou"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v daném roce napsali",
        "lidé s určitou životní zkušeností – bez ohledu na to, kdy tuto",
        "zkušenost udělali. Povšimněte si drobných zubů v roce 1990:", 
        "po revoluci začali vycházet jak lidé dříve věznění, tak vyloučení",
        "členové KSČ. Data jsou neúplná, reálné podíly budou spíše vyšší."
    ]}, width=300).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#445B78', '#5E2D3A', '#D6534B', '#9CA545','#E09DA3']),
              sort=["účet na Facebooku","pobyt ve vězení","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(height=100, width=220).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent")

podily_faceted

In [70]:
me_to_neurazi(podily_faceted, soubor="02_zkusenosti", kredity=kredity['default'])

<figure><a href="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" target="_blank"><img src="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Co měli čeští spisovatelé za sebou nebo před sebou']“. Další texty by měly být čitelné ze zdrojového souboru SVG." /></a></figure>
<figure><a href="https://michalkasparek.cz/sklad/02_zkusenosti.svg" target="_blank"><img src="https://michalkasparek.cz/sklad/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Co měli čeští spisovatelé za sebou nebo před sebou']“. Další texty by měly být čitelné ze zdrojového souboru SVG." /></a></figure>


In [27]:
df.group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1801,5
1802,3
1803,5
1804,7
1805,9
1806,4
1807,9
1808,5
1809,4
1810,4
