In [1]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [4]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [5]:
len(wikid)

197515

In [6]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Moravec, Vladimír""","""jx20041124019""","[""edt""]",,,,,,"""bk195803432""",""" nam a22 1 4500""","""980526s1957 xr e 0…",,,,,,,,,,"""1""","""0""","""Právnický heslovník""",,"""zpracoval Vladimír Moravec""","[""(1. dodatek) /""]",,,,,"[""71 s. ;""]",,"[""8°""]",,,,"[""7""]","[""příručky""]","[""fd133209""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1957,71
"""1""","""Kolařík, Zdeněk,""","""jo20221148965""","[""aut""]","""1956-""",,,,,"""nkc20223406953""",""" nam a22 i 4500""","""220324s2021 xr a g 0…","[""(vázáno)""]",,"[""978-80-263-1657-2""]",,,,,,,"""1""","""0""","""Platit srdcem člověka""",,"""Zdeněk Kolařík""",,,,,,"[""59 stran :""]","[""barevné ilustrace ;""]","[""15 cm""]",,,,"[""7"", ""9""]","[""česká poezie"", ""Czech poetry""]","[""fd133958"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,59
"""1""","""Janoušek, Karel,""","""jn99240000363""","[""aut""]","""1946-""",,,,,"""nkc20152661809""",""" nam a22 a 4500""","""150211s2015 xr ak e f 0…","[""(brož.) :""]","[""Kč 440,00""]","[""978-80-87173-30-5""]",,,,,,,"""1""","""0""","""Cestovní náhrady a daň silničn…",,"""Karel Janoušek""",,,,,,"[""374 s. :""]","[""il., formuláře ;""]","[""21 cm""]",,,,"[""7"", ""9""]","[""příručky"", ""handbooks and manuals""]","[""fd133209"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2015,374


In [7]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [8]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [9]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Brož, Josef,""","""mzk2002146604""","[""aut""]","""1921-2020""",,,,,"""nkc20061640270""",""" nam a22 a 4500""","""060130s2005 xr a g 0…","[""(brož.)""]",,"[""80-903659-2-2""]",,,,,,,"""1""","""0""","""O pejskovi Šmudlíkovi""",,"""Josef Brož ; [kresby Josef Mar…",,,,,,"[""47 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,2005,47,"""Q95480379""",,,,,[],"""Josef Brož""","""Josef Brož""",,,,"""Narozen 27. 1. 1921 v obci Lho…",,,,"[""pedagog"", ""spisovatel"", … ""středoškolský učitel""]",,,,,,,,"""muž""","[""Lhotice""]",,1921.0,"""+1921-01-27T00:00:00Z""",,,,,
"""1""","""Adamec, Čeněk,""","""jk01010038""","[""edt""]","""1916-1997""",,,,,"""bk194702345""",""" nam a22 1 4500""","""980818s1947 xr …","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Nevědomost zabíjí!""","""úmrtnost kojenců v zrcadle veř…","""zpracovali: Ing. Dr. Čeněk Ada…",,,,,,"[""31-[I] s. ;""]",,"[""8°""]",,,,…,,,1947,31,"""Q65451216""",,,,,"[""de"", ""cs""]","""Čeněk Adamec""","""Čeněk Adamec""",,"[""Československo"", ""Česko""]",,"""český sociolog""","""Czech sociologist""",,,"[""sociolog""]",,,,,,,,"""muž""","[""Ivančice"", ""Padochov""]","[""Praha""]",1916.0,"""+1916-11-19T00:00:00Z""",1997.0,"""+1997-10-12T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Polanecký, Karel,""","""ola2004209203""","[""aut""]","""1971-""",,,,,"""cpk20031199560""",""" nam a22 a 4500""","""030319s2002 xr a e 0…","[""(brož.)""]",,"[""80-902823-6-9""]",,,,,,,"""1""","""0""","""Jak využívat obnovitelné zdroj…","""praktický rádce pro domácnosti…","""Karel Polanecký a Jiří Bursa""",,,,,,"[""90 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,2002,90,"""Q112381017""",,,,,[],"""Karel Polanecký""","""Karel Polanecký""",,,,"""Narozen 4. 6. 1971. Energetick…",,,,"[""redaktor"", ""ekolog"", ""editor""]",,,,,,,,"""muž""",,,1971.0,"""+1971-06-04T00:00:00Z""",,,,,
"""1""","""Hruška, František,""","""jn20010310431""","[""aut""]","""1941-""",,,,,"""nkc20071717490""",""" nam a22 a 4500""","""070425s2007 xr a e p 0…","[""(brož.)""]",,"[""978-80-7318-535-0""]",,,,,,,"""1""","""0""","""Technické prostředky informati…","""(úvod, popis funkce, konstrukc…","""František Hruška""",,,,,,"[""193 s. :""]","[""il. ;""]","[""30 cm""]",,,,…,,,2007,193,"""Q112363576""",,,,,[],"""František Hruška""",,,,,"""Narodil se r. 1941 v Kunovicíc…",,,,"[""vysokoškolský učitel""]",,,,,,,,,,,1941.0,"""+1941-00-00T00:00:00Z""",,,,,
"""1""","""Vogel, Josef""","""ola2006345500""","[""aut""]",,,,,,"""ck8606900""",""" nam a22 4500""","""861014s1985 xr a u0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Fel Pascal SMEP""",,"""Josef Vogel, Jiří Daněček""",,,,,,"[""206 s. :""]","[""tb., schémata ;""]","[""23 cm""]",,,,…,,,1985,206,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Sedláková, Bohumila,""","""mzk2004156918""","[""aut""]","""1952-""",,,,,"""ck8502006""",""" nam a22 4500""","""850312s1984 xr e b 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Výchova k socialistickému vlas…","""výběrový bibliografický soupis…","""zprac. Bohumila Sedláková""",,,,,,"[""31 s. ;""]",,"[""20 cm""]",,,,…,,,1984,31,"""Q95112444""",,,,,"[""cs""]","""Bohumila Sedláková""","""Bohumila Sedláková""",,,,"""Narozena 13. 2. 1952 ve Vyškov…",,,,"[""pedagog"", ""bibliograf""]",,,,,,,,"""žena""","[""Vyškov""]",,1952.0,"""+1952-02-13T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Krejza, Miloš,""","""jk01063270""","[""aut""]","""1914-""",,,,,"""bk193602557""",""" nam a22 i 4500""","""990318s1936 xr e 0…","[""(Brožováno)""]",,,,,,,,,"""1""","""0""","""Bedřich Ozanam""","""křesťan v bezpečné blízkosti b…","""Miloš Krejza""",,,,,,"[""53 stran ;""]",,"[""19 cm""]",,,,…,,,1936,53,"""Q105304765""",,,,,[],"""Miloš Krejza""","""Miloš Krejza""",,,,,,,,"[""básník"", ""překladatel"", … ""editor""]",,,,,,,,"""muž""","[""Vražkov""]",,1914.0,"""+1914-01-01T00:00:00Z""",,,,,
"""1""","""Boukal, Petr,""","""mzk2004237320""","[""aut""]","""1962-2022""",,,,,"""cpk20010994800""",""" cam a22 a 4500""","""010503s2001 xr e p 0…","[""(brož.)""]",,"[""80-245-0141-4""]",,,,,,,"""1""","""0""","""Podniková ekonomika""","""cvičebnice /""","""Petr Boukal, Hana Mikovcová, H…",,,,,,"[""143 s. ;""]",,"[""30 cm""]",,,,…,,,2001,143,"""Q112385903""",,,,,[],"""Petr Boukal""","""Petr Boukal""",,,,"""Narozen 1962. Inženýr ekonomie…",,,,"[""vysokoškolský učitel""]",,,,,,,,"""muž""","[""Praha""]",,1962.0,"""+1962-00-00T00:00:00Z""",,,,,
"""1""","""Vaněček, Jan,""","""mzk2004261193""","[""aut""]","""1946-""",,,,,"""nkc20081789779""",""" nam a22 a 4500""","""080829s2008 xr ach e 0…","[""(brož.)""]",,"[""978-80-254-1434-7""]",,,,,,,"""1""","""0""","""Bílé vzpomínání""",,"""Jan Vaněček ; [kresby Miroslav…",,,,,,"[""135 s. :""]","[""il. (některé barev.), portréty, faksim. ;""]","[""23 cm""]",,,,…,,,2008,135,"""Q11775066""",,,,,"[""cs""]","""Jan Vaněček""","""Jan Vaněček""",,"[""Československo""]",,"""český spisovatel""","""Czech writer""",,,"[""spisovatel""]",,,,,,,,"""muž""","[""Český Krumlov""]",,1946.0,"""+1946-10-07T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Kratochvil, Jan,""","""mzk2004217426""","[""aut""]","""1951-""",,,,,"""nkc20081825102""",""" nam a22 a 4500""","""081030s2008 xr ach e c 0…","[""(v knize neuvedeno ;"", ""váz.)""]",,"[""978-80-254-3861-9""]",,,,,,,"""1""","""0""","""90. výročí založení Českoslove…","""90th anniversary of creation o…","""Jan a Sabina Kratochvilovi a k…",,,,,,"[""204 s. :""]","[""il. (některé barev.), portréty, faksim. ;""]","[""31 cm""]",,,,…,,,2008,204,"""Q86433675""","[""Cena města Brna""]",,,,[],"""Jan Kratochvil""","""Jan Kratochvil""","[""Sabina Kratochvilová""]","[""Československo"", ""Česko""]",,"""český regionální publicista""","""Czech opinion journalist""",,,"[""publicista"", ""malíř"", … ""ilustrátor""]",,,"[""Jef Kratochvil""]",,,,,"""muž""","[""Brno""]",,1951.0,"""+1951-12-10T00:00:00Z""",,,,,


In [10]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [11]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [12]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [13]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Brož, Josef,""","""mzk2002146604""","[""aut""]","""1921-2020""",,,,,"""nkc20061640270""",""" nam a22 a 4500""","""060130s2005 xr a g 0…","[""(brož.)""]",,"[""80-903659-2-2""]",,,,,,,"""1""","""0""","""O pejskovi Šmudlíkovi""",,"""Josef Brož ; [kresby Josef Mar…",,,,,,"[""47 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,2005,47,"""Q95480379""",,,,,[],"""Josef Brož""","""Josef Brož""",,,,"""Narozen 27. 1. 1921 v obci Lho…",,,,"[""pedagog"", ""spisovatel"", … ""středoškolský učitel""]",,,,,,,,"""muž""","[""Lhotice""]",,1921.0,"""+1921-01-27T00:00:00Z""",,,,,
"""1""","""Adamec, Čeněk,""","""jk01010038""","[""edt""]","""1916-1997""",,,,,"""bk194702345""",""" nam a22 1 4500""","""980818s1947 xr …","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Nevědomost zabíjí!""","""úmrtnost kojenců v zrcadle veř…","""zpracovali: Ing. Dr. Čeněk Ada…",,,,,,"[""31-[I] s. ;""]",,"[""8°""]",,,,…,,,1947,31,"""Q65451216""",,,,,"[""de"", ""cs""]","""Čeněk Adamec""","""Čeněk Adamec""",,"[""Československo"", ""Česko""]",,"""český sociolog""","""Czech sociologist""",,,"[""sociolog""]",,,,,,,,"""muž""","[""Ivančice"", ""Padochov""]","[""Praha""]",1916.0,"""+1916-11-19T00:00:00Z""",1997.0,"""+1997-10-12T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Polanecký, Karel,""","""ola2004209203""","[""aut""]","""1971-""",,,,,"""cpk20031199560""",""" nam a22 a 4500""","""030319s2002 xr a e 0…","[""(brož.)""]",,"[""80-902823-6-9""]",,,,,,,"""1""","""0""","""Jak využívat obnovitelné zdroj…","""praktický rádce pro domácnosti…","""Karel Polanecký a Jiří Bursa""",,,,,,"[""90 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,2002,90,"""Q112381017""",,,,,[],"""Karel Polanecký""","""Karel Polanecký""",,,,"""Narozen 4. 6. 1971. Energetick…",,,,"[""redaktor"", ""ekolog"", ""editor""]",,,,,,,,"""muž""",,,1971.0,"""+1971-06-04T00:00:00Z""",,,,,
"""1""","""Hruška, František,""","""jn20010310431""","[""aut""]","""1941-""",,,,,"""nkc20071717490""",""" nam a22 a 4500""","""070425s2007 xr a e p 0…","[""(brož.)""]",,"[""978-80-7318-535-0""]",,,,,,,"""1""","""0""","""Technické prostředky informati…","""(úvod, popis funkce, konstrukc…","""František Hruška""",,,,,,"[""193 s. :""]","[""il. ;""]","[""30 cm""]",,,,…,,,2007,193,"""Q112363576""",,,,,[],"""František Hruška""",,,,,"""Narodil se r. 1941 v Kunovicíc…",,,,"[""vysokoškolský učitel""]",,,,,,,,,,,1941.0,"""+1941-00-00T00:00:00Z""",,,,,
"""1""","""Vogel, Josef""","""ola2006345500""","[""aut""]",,,,,,"""ck8606900""",""" nam a22 4500""","""861014s1985 xr a u0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Fel Pascal SMEP""",,"""Josef Vogel, Jiří Daněček""",,,,,,"[""206 s. :""]","[""tb., schémata ;""]","[""23 cm""]",,,,…,,,1985,206,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""1""","""Sedláková, Bohumila,""","""mzk2004156918""","[""aut""]","""1952-""",,,,,"""ck8502006""",""" nam a22 4500""","""850312s1984 xr e b 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Výchova k socialistickému vlas…","""výběrový bibliografický soupis…","""zprac. Bohumila Sedláková""",,,,,,"[""31 s. ;""]",,"[""20 cm""]",,,,…,,,1984,31,"""Q95112444""",,,,,"[""cs""]","""Bohumila Sedláková""","""Bohumila Sedláková""",,,,"""Narozena 13. 2. 1952 ve Vyškov…",,,,"[""pedagog"", ""bibliograf""]",,,,,,,,"""žena""","[""Vyškov""]",,1952.0,"""+1952-02-13T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Krejza, Miloš,""","""jk01063270""","[""aut""]","""1914-""",,,,,"""bk193602557""",""" nam a22 i 4500""","""990318s1936 xr e 0…","[""(Brožováno)""]",,,,,,,,,"""1""","""0""","""Bedřich Ozanam""","""křesťan v bezpečné blízkosti b…","""Miloš Krejza""",,,,,,"[""53 stran ;""]",,"[""19 cm""]",,,,…,,,1936,53,"""Q105304765""",,,,,[],"""Miloš Krejza""","""Miloš Krejza""",,,,,,,,"[""básník"", ""překladatel"", … ""editor""]",,,,,,,,"""muž""","[""Vražkov""]",,1914.0,"""+1914-01-01T00:00:00Z""",,,,,
"""1""","""Boukal, Petr,""","""mzk2004237320""","[""aut""]","""1962-2022""",,,,,"""cpk20010994800""",""" cam a22 a 4500""","""010503s2001 xr e p 0…","[""(brož.)""]",,"[""80-245-0141-4""]",,,,,,,"""1""","""0""","""Podniková ekonomika""","""cvičebnice /""","""Petr Boukal, Hana Mikovcová, H…",,,,,,"[""143 s. ;""]",,"[""30 cm""]",,,,…,,,2001,143,"""Q112385903""",,,,,[],"""Petr Boukal""","""Petr Boukal""",,,,"""Narozen 1962. Inženýr ekonomie…",,,,"[""vysokoškolský učitel""]",,,,,,,,"""muž""","[""Praha""]",,1962.0,"""+1962-00-00T00:00:00Z""",,,,,
"""1""","""Vaněček, Jan,""","""mzk2004261193""","[""aut""]","""1946-""",,,,,"""nkc20081789779""",""" nam a22 a 4500""","""080829s2008 xr ach e 0…","[""(brož.)""]",,"[""978-80-254-1434-7""]",,,,,,,"""1""","""0""","""Bílé vzpomínání""",,"""Jan Vaněček ; [kresby Miroslav…",,,,,,"[""135 s. :""]","[""il. (některé barev.), portréty, faksim. ;""]","[""23 cm""]",,,,…,,,2008,135,"""Q11775066""",,,,,"[""cs""]","""Jan Vaněček""","""Jan Vaněček""",,"[""Československo""]",,"""český spisovatel""","""Czech writer""",,,"[""spisovatel""]",,,,,,,,"""muž""","[""Český Krumlov""]",,1946.0,"""+1946-10-07T00:00:00Z""",,,,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Kratochvil, Jan,""","""mzk2004217426""","[""aut""]","""1951-""",,,,,"""nkc20081825102""",""" nam a22 a 4500""","""081030s2008 xr ach e c 0…","[""(v knize neuvedeno ;"", ""váz.)""]",,"[""978-80-254-3861-9""]",,,,,,,"""1""","""0""","""90. výročí založení Českoslove…","""90th anniversary of creation o…","""Jan a Sabina Kratochvilovi a k…",,,,,,"[""204 s. :""]","[""il. (některé barev.), portréty, faksim. ;""]","[""31 cm""]",,,,…,,,2008,204,"""Q86433675""","[""Cena města Brna""]",,,,[],"""Jan Kratochvil""","""Jan Kratochvil""","[""Sabina Kratochvilová""]","[""Československo"", ""Česko""]",,"""český regionální publicista""","""Czech opinion journalist""",,,"[""publicista"", ""malíř"", … ""ilustrátor""]",,,"[""Jef Kratochvil""]",,,,,"""muž""","[""Brno""]",,1951.0,"""+1951-12-10T00:00:00Z""",,,,,


In [14]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [15]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [16]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [17]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [18]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport L""",64
"""Transport Cc""",64
"""svěcení""",60
"""emigrace""",58
"""Transport Ds""",58


In [19]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1900):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).sort(by="rok").with_columns(pl.lit(nazev).alias("co")) # .with_columns(pl.col("podil").rolling_mean(window_size=2))

In [20]:
kriminal = zkusenost(sloupec="vezeni", nazev="pobyt ve vězení nebo koncentračním táboře")

In [21]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="členství v KSČ")

In [22]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [23]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [24]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [25]:
web=zkusenost(sloupec="web",nazev="webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,,,"""webové stránky"""
1901,602,,,"""webové stránky"""
1902,745,1,0.001342,"""webové stránky"""
1903,631,,,"""webové stránky"""
1904,648,1,0.001543,"""webové stránky"""
1905,613,1,0.001631,"""webové stránky"""
1906,665,1,0.001504,"""webové stránky"""
1907,645,1,0.00155,"""webové stránky"""
1908,788,2,0.002538,"""webové stránky"""
1909,727,,,"""webové stránky"""


In [26]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,1,0.002315,"""internace v Terezíně"""
1901,602,1,0.001661,"""internace v Terezíně"""
1902,745,1,0.001342,"""internace v Terezíně"""
1903,631,2,0.00317,"""internace v Terezíně"""
1904,648,3,0.00463,"""internace v Terezíně"""
1905,613,2,0.003263,"""internace v Terezíně"""
1906,665,2,0.003008,"""internace v Terezíně"""
1907,645,5,0.007752,"""internace v Terezíně"""
1908,788,5,0.006345,"""internace v Terezíně"""
1909,727,4,0.005502,"""internace v Terezíně"""


In [27]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, web])), 
    title={'text': ["Co měli čeští autoři za sebou nebo před sebou"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v daném roce napsali",
        "lidé s určitou životní zkušeností – bez ohledu na to, kdy tuto",
        "zkušenost udělali. Povšimněte si drobných zubů v roce 1990:", 
        "po revoluci začali vycházet jak lidé dříve věznění, tak vyloučení",
        "členové KSČ. Data jsou neúplná, reálné podíly budou spíše vyšší;",
         "podstatné jsou zde trendy."
    ]}).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#5E2D3A', '#D6534B', '#445B78']),
              sort=["účet na Facebooku","pobyt ve vězení nebo koncentračním táboře","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelFontWeight=500, labelFont='Asap', labelOrient="top"),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(
    width=kredity['sirka'], 
    height=kredity['vyska_nizkych'], 
    autosize={'type': 'fit', 'contains': 'padding'}
).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent").configure_view(stroke='transparent').configure_axis(grid=False, domain=False)

podily_faceted

In [28]:
me_to_neurazi(podily_faceted, soubor="02_zkusenosti", kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Co měli čeští autoři za sebou nebo před sebou']“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [29]:
df.group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1801,5
1802,3
1803,5
1804,7
1805,9
1806,4
1807,9
1808,5
1809,4
1810,4
