In [3]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [4]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

716789
449120


In [5]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
print(len(cesi))
df = df.filter(pl.col("100_7").is_in(cesi))

364420


In [6]:
wikid = pl.read_parquet(os.path.join("data","wikidata.parquet"))

In [83]:
len(wikid)

197515

In [7]:
df.sample(3)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64
"""1""","""Srb, Adolf,""","""jk01120732""","[""aut""]","""1850-1933""",,,,,"""nos190245125""",""" nam a22 a 4500""","""001030s1908 xr 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Šedesát let politického zápasu…",,"""napsal Adolf Srb""",,,,,,"[""323 s. ;""]",,"[""18 cm""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1908,323
"""1""","""Pelikán, Petr,""","""skuk0004513""","[""aut""]","""1964-""",,,,,"""cpk19970214089""",""" nam a22 a 4500""","""970820s1997 xr e 0…","[""(Ediční středisko PF UK ;"", ""brož.)""]",,"[""80-85889-13-7""]",,,,,,,"""1""","""0""","""Sunna""","""pramen islámského práva /""","""Petr Pelikán""",,,,,,"[""163 s. ;""]",,"[""21 cm""]",,,,"[""7""]","[""studie""]","[""fd133597""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1997,163
"""1""","""Hochmuth, Kristýna""","""ola2011655577""","[""aut""]",,,,,,"""nkc20172892291""",""" nam a22 i 4500""","""170418s2016 xr ach f 0…","[""(Alšova jihočeská galerie ;"", ""vázáno)"", … ""vázáno)""]",,"[""978-80-87799-60-4"", ""978-80-7467-121-0""]",,,,,,,"""1""","""0""","""Hippolyt Soběslav Pinkas""",,"""Kristýna Brožová""",,,,,,"[""130 stran :""]","[""ilustrace (převážně barevné), portréty, faksimile ;""]","[""28 cm""]",,,,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,"[""1""]","[""Pinkas, Soběslav Hyppolyt,""]","[""art""]","[""1827-1901""]","[""jk01093124""]",,,,,,,,,,,,,,,,,,,,,,2016,130


In [8]:
df = df.join(wikid.rename({"__index_level_0__":"100_7"}), left_on='100_7', right_on='100_7', how='left')

In [9]:
df = df.filter((pl.col('w_umrti') >= pl.col('rok')) | pl.col('w_umrti').is_null())

In [10]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Placák, Bedřich,""","""jk01093315""","[""aut""]","""1914-1993""",,,,,"""bk196902149""",""" nam a22 1 4500""","""971004s1969 xr e | 0…",,,,,,,,,,"""1""","""0""","""Střelná poranění plic""",,"""Bedřich Placák""",,,,,,"[""125 s. :""]","[""il., tb. ;""]","[""8°""]",,,,…,,,1969,125,"""Q51000794""",,,,,"[""cs""]","""Bedřich Placák""","""Bedrich Placak""",,"[""Československo"", ""Česko""]",,"""český lékař""","""Surgeon and professor of medic…","[""Petr Placák""]",,"[""lékař"", ""autor memoárů"", … ""spisovatel""]",,,,"[""Komunistická strana Československa""]",,,,"""muž""","[""Vídeň""]","[""Praha""]",1914.0,"""+1914-02-16T00:00:00Z""",1993.0,"""+1993-03-01T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Mach, František Jiří,""","""jk01072887""","[""com""]","""1869-1952""",,,,,"""bk193402220""",""" nam a22 1 4500""","""990809s1934 xr …",,,,,,,,,,"""1""","""0""","""Zpěvoherní repertoire""","""Stručné obsahy českých a cizíc…","""Uspořádali: Fr. a M. Machovi ;…",,,,,,"[""431 - [I] s. ;""]",,"[""8°""]",,,,…,,,1934,431,"""Q12017127""",,,,,"[""cs""]","""František Jiří Mach""","""František Jiří Mach""",,"[""Československo""]",,"""český hudební pedagog a hudebn…","""Czech music educator and compo…",,,"[""hudební skladatel"", ""hudební pedagog"", ""učitel""]",,,,,,,,"""muž""","[""Kutná Hora""]","[""Pečky""]",1869.0,"""+1869-08-05T00:00:00Z""",1952.0,"""+1952-12-10T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Dostal, Adolf Bohuslav,""","""jk01022770""","[""aut""]","""1873-1939""",,,,,"""bkn20010988729""",""" nam a22 1 4500""","""010212s1925 xr c |…",,,,,,,,,,"""1""","""0""","""Studánka mudrců""","""pohádky a vyprávěnky /""","""A. Böhmová""",,,,,,"[""93 s. :""]","[""obr. příl. ;""]","[""8°""]",,,,…,,,1925,93,"""Q20565546""","[""Kříž nezávislosti""]",,,,"[""cs"", ""pl""]","""Adolf Bohuslav Dostal""","""Adolf Bohuslav Dostal""",,,,"""český spisovatel a režisér""",,,"""poprava zastřelením""","[""voják"", ""básník"", … ""redaktor""]",,"[""Právnická fakulta Univerzity Karlovy""]","[""Vilém Dostal"", ""Václav Dostal"", … ""Hana Dostalová""]",,,,,"""muž""","[""Veleslavín"", ""Praha""]","[""Katyň""]",1873.0,"""+1873-12-30T00:00:00Z""",1940.0,"""+1940-00-00T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Machálek, Jan,""","""xx0154711""","[""aut""]","""1939-""",,,,,"""nkc20193093548""",""" cam a22 i 4500""","""190424s2019 xr a e 0…","[""(brožováno) :""]","[""Kč 99,00""]","[""978-80-7364-091-0""]",,,,,,,"""1""","""0""","""Lékař vzpomíná, baví... i když…",,"""Jan Machálek, Marcela Losová""",,,,,,"[""54 stran :""]","[""ilustrace ;""]","[""21 cm""]",,,,…,,,2019,54,"""Q95093657""",,,,,[],"""Jan Machálek""","""Jan Machálek""",,,,"""Narozen 29. 1. 1939 v Moravský…",,,,"[""lékař"", ""mikrobiolog"", … ""učitel""]",,,,,,,,"""muž""","[""Moravské Málkovice""]",,1939.0,"""+1939-01-29T00:00:00Z""",,,,,
"""1""","""Boubela, Rudolf Václav,""","""jk01012763""","[""aut""]","""1880-1934""",,,,,"""bk193004338""",""" nam a22 1 4500""","""000101s1925 xr g 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Brno""","""informační příručka a stručný …","""napsal R. Boubela""",,,,,,"[""67 s. ;""]",,"[""19 cm""]",,,,…,,,1925,67,"""Q95361663""",,,,,[],"""Rudolf Václav Boubela""","""Rudolf Václav Boubela""",,"[""Československo""]",,"""Narozen 9.8.1880 v Moravské Os…","""Czechoslovak journalist (1880-…",,,"[""novinář"", ""redaktor""]",,,,,,,,"""muž""","[""Moravská Ostrava"", ""Ostrava""]","[""Brno""]",1880.0,"""+1880-08-09T00:00:00Z""",1934.0,"""+1934-06-23T00:00:00Z""",,,
"""1""","""Beráková, Zora,""","""jk01011703""","[""aut""]","""1921-2020""",,,,,"""nkc20162833660""",""" nam a22 i 4500""","""160916s2017 xr g 0…","[""(vázáno) :""]","[""Kč 269,00""]","[""978-80-267-0688-5""]",,,,,,,"""1""","""0""","""Vraždy v hotelu""",,"""Zora Beráková""",,,,,,"[""278 stran ;""]",,"[""22 cm""]",,,,…,,,2017,278,"""Q55201729""",,,,,"[""cs""]","""Zora Beráková""","""Zora Beráková""",,"[""Československo"", ""Česko""]",,"""česká spisovatelka a překladat…","""Czech writer and translator""",,,"[""překladatel"", ""spisovatel"", … ""prozaik""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""žena""","[""Vsetín""]",,1921.0,"""+1921-07-27T00:00:00Z""",2020.0,"""+2020-08-30T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Bor, D. Ž.,""","""jo2001008482""","[""aut""]","""1932-2010""",,,,,"""nkc20102152752""",""" nam a22 a 4500""","""101213s2010 xr cfh e 0…","[""(váz.)""]",,"[""978-80-86159-69-0""]",,,,,,,"""1""","""0""","""Časobraní, aneb, Můj život v s…",,"""D.Ž. Bor""",,,,,,"[""296 s., xvi s. obr. příl. :""]","[""portréty, faksim. ;""]","[""24 cm""]",,,,…,,,2010,296,"""Q11912064""",,,,,"[""cs""]","""Vladislav Zadrobílek""","""Vladislav Zadrobílek""",,"[""Česko"", ""Československo""]",,"""český básník, hermetik a hudeb…","""Czech poet, hermeticist and mu…",,,"[""spisovatel"", ""básník"", … ""výtvarník""]",,,,,,,,"""muž""","[""Praha"", ""Stodůlky""]","[""Praha""]",1932.0,"""+1932-11-14T00:00:00Z""",2010.0,"""+2010-12-11T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Matoušek, Karel,""","""jk01080803""","[""aut""]","""1914-1969""",,,,,"""bk194102191""",""" nam a22 1 4500""","""990308s1941 xr |…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Učebnice psaní strojem desetip…","""pro obchodní učiliště a praxi …","""Matoušek, Petrásek, Zářecký""",,,,,,"[""42 s. :""]","[""[XX] příl. ;""]","[""4°""]",,,,…,,,1941,42,"""Q95364149""",,,,,[],"""Karel Matoušek""","""Karel Matoušek""",,,,"""Narozen 6.8.1914, zemřel 3.9.1…",,,,"[""stenograf""]",,,,,,,,"""muž""",,,1914.0,"""+1914-08-06T00:00:00Z""",1969.0,"""+1969-09-03T00:00:00Z""",,,
"""1""","""Augustin, Hans""","""jn20001005230""","[""aut""]",,,,,,"""bk193900053""",""" nam a22 1 4500""","""990118s1939 xr a 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Učebnice němčiny""","""němčina hovorem i obrazem pro …","""napsal H. Augustin""",,,,,,"[""vii, 379 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,1939,379,"""Q120573722""",,,,,[],"""Hans Augustin""","""Hans Augustin""",,,,"""autor učebnic němčiny""",,,,,,,,,,,,"""muž""",,,,,,,,,
"""1""","""Gladkij, Ivan,""","""jn20000400777""","[""aut""]","""1933-""",,,,,"""ck9100177""",""" nam a22 4500""","""910208s1990 xr u0…","[""(Brož.) :""]","[""2 Kčs""]",,,,,,,,"""1""","""0""","""Vybrané kapitoly ze sociálního…",,"""I. Gladkij""",,,,,,"[""34 s. ;""]",,"[""20 cm""]",,,,…,,,1990,34,"""Q95391844""",,,,,[],"""Ivan Gladkij""","""Ivan Gladkij""",,"[""Československo""]",,"""Narozen 17.5.1933 v Brně. Prof…",,,,"[""lékař""]",,,,,,,,"""muž""","[""Brno""]",,1933.0,"""+1933-05-17T00:00:00Z""",,,,,


In [11]:
strany = df.explode('strany').group_by('strany').len().sort(by='len',descending=True)

In [12]:
strany

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [13]:
vezeni = df.explode('vezeni').group_by('vezeni').len().sort(by='len',descending=True)
vezeni

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [14]:
df

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_9,700_g,rok,stran,024_a,ceny,druh_umrti,facebook,instagram,jazykove_verze,label_cs,label_en,manzelstvo,obcanstvi,partnerstvo,popis_cs,popis_en,potomstvo,pricina_umrti,profese,role,skoly,sourozenectvo,strany,twitter,udalosti,vezeni,w_gender,w_misto_narozeni,w_misto_umrti,w_narozeni,w_narozeni_presne,w_umrti,w_umrti_presne,web,wiki_cs,wiki_en
str,str,str,list[str],str,str,list[str],str,str,str,str,str,list[str],list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],i64,i64,str,list[str],str,str,str,list[str],str,str,list[str],list[str],list[str],str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],f64,str,f64,str,str,str,str
"""1""","""Placák, Bedřich,""","""jk01093315""","[""aut""]","""1914-1993""",,,,,"""bk196902149""",""" nam a22 1 4500""","""971004s1969 xr e | 0…",,,,,,,,,,"""1""","""0""","""Střelná poranění plic""",,"""Bedřich Placák""",,,,,,"[""125 s. :""]","[""il., tb. ;""]","[""8°""]",,,,…,,,1969,125,"""Q51000794""",,,,,"[""cs""]","""Bedřich Placák""","""Bedrich Placak""",,"[""Československo"", ""Česko""]",,"""český lékař""","""Surgeon and professor of medic…","[""Petr Placák""]",,"[""lékař"", ""autor memoárů"", … ""spisovatel""]",,,,"[""Komunistická strana Československa""]",,,,"""muž""","[""Vídeň""]","[""Praha""]",1914.0,"""+1914-02-16T00:00:00Z""",1993.0,"""+1993-03-01T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Mach, František Jiří,""","""jk01072887""","[""com""]","""1869-1952""",,,,,"""bk193402220""",""" nam a22 1 4500""","""990809s1934 xr …",,,,,,,,,,"""1""","""0""","""Zpěvoherní repertoire""","""Stručné obsahy českých a cizíc…","""Uspořádali: Fr. a M. Machovi ;…",,,,,,"[""431 - [I] s. ;""]",,"[""8°""]",,,,…,,,1934,431,"""Q12017127""",,,,,"[""cs""]","""František Jiří Mach""","""František Jiří Mach""",,"[""Československo""]",,"""český hudební pedagog a hudebn…","""Czech music educator and compo…",,,"[""hudební skladatel"", ""hudební pedagog"", ""učitel""]",,,,,,,,"""muž""","[""Kutná Hora""]","[""Pečky""]",1869.0,"""+1869-08-05T00:00:00Z""",1952.0,"""+1952-12-10T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Dostal, Adolf Bohuslav,""","""jk01022770""","[""aut""]","""1873-1939""",,,,,"""bkn20010988729""",""" nam a22 1 4500""","""010212s1925 xr c |…",,,,,,,,,,"""1""","""0""","""Studánka mudrců""","""pohádky a vyprávěnky /""","""A. Böhmová""",,,,,,"[""93 s. :""]","[""obr. příl. ;""]","[""8°""]",,,,…,,,1925,93,"""Q20565546""","[""Kříž nezávislosti""]",,,,"[""cs"", ""pl""]","""Adolf Bohuslav Dostal""","""Adolf Bohuslav Dostal""",,,,"""český spisovatel a režisér""",,,"""poprava zastřelením""","[""voják"", ""básník"", … ""redaktor""]",,"[""Právnická fakulta Univerzity Karlovy""]","[""Vilém Dostal"", ""Václav Dostal"", … ""Hana Dostalová""]",,,,,"""muž""","[""Veleslavín"", ""Praha""]","[""Katyň""]",1873.0,"""+1873-12-30T00:00:00Z""",1940.0,"""+1940-00-00T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Machálek, Jan,""","""xx0154711""","[""aut""]","""1939-""",,,,,"""nkc20193093548""",""" cam a22 i 4500""","""190424s2019 xr a e 0…","[""(brožováno) :""]","[""Kč 99,00""]","[""978-80-7364-091-0""]",,,,,,,"""1""","""0""","""Lékař vzpomíná, baví... i když…",,"""Jan Machálek, Marcela Losová""",,,,,,"[""54 stran :""]","[""ilustrace ;""]","[""21 cm""]",,,,…,,,2019,54,"""Q95093657""",,,,,[],"""Jan Machálek""","""Jan Machálek""",,,,"""Narozen 29. 1. 1939 v Moravský…",,,,"[""lékař"", ""mikrobiolog"", … ""učitel""]",,,,,,,,"""muž""","[""Moravské Málkovice""]",,1939.0,"""+1939-01-29T00:00:00Z""",,,,,
"""1""","""Boubela, Rudolf Václav,""","""jk01012763""","[""aut""]","""1880-1934""",,,,,"""bk193004338""",""" nam a22 1 4500""","""000101s1925 xr g 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Brno""","""informační příručka a stručný …","""napsal R. Boubela""",,,,,,"[""67 s. ;""]",,"[""19 cm""]",,,,…,,,1925,67,"""Q95361663""",,,,,[],"""Rudolf Václav Boubela""","""Rudolf Václav Boubela""",,"[""Československo""]",,"""Narozen 9.8.1880 v Moravské Os…","""Czechoslovak journalist (1880-…",,,"[""novinář"", ""redaktor""]",,,,,,,,"""muž""","[""Moravská Ostrava"", ""Ostrava""]","[""Brno""]",1880.0,"""+1880-08-09T00:00:00Z""",1934.0,"""+1934-06-23T00:00:00Z""",,,
"""1""","""Beráková, Zora,""","""jk01011703""","[""aut""]","""1921-2020""",,,,,"""nkc20162833660""",""" nam a22 i 4500""","""160916s2017 xr g 0…","[""(vázáno) :""]","[""Kč 269,00""]","[""978-80-267-0688-5""]",,,,,,,"""1""","""0""","""Vraždy v hotelu""",,"""Zora Beráková""",,,,,,"[""278 stran ;""]",,"[""22 cm""]",,,,…,,,2017,278,"""Q55201729""",,,,,"[""cs""]","""Zora Beráková""","""Zora Beráková""",,"[""Československo"", ""Česko""]",,"""česká spisovatelka a překladat…","""Czech writer and translator""",,,"[""překladatel"", ""spisovatel"", … ""prozaik""]",,"[""Filozofická fakulta Univerzity Karlovy""]",,,,,,"""žena""","[""Vsetín""]",,1921.0,"""+1921-07-27T00:00:00Z""",2020.0,"""+2020-08-30T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Bor, D. Ž.,""","""jo2001008482""","[""aut""]","""1932-2010""",,,,,"""nkc20102152752""",""" nam a22 a 4500""","""101213s2010 xr cfh e 0…","[""(váz.)""]",,"[""978-80-86159-69-0""]",,,,,,,"""1""","""0""","""Časobraní, aneb, Můj život v s…",,"""D.Ž. Bor""",,,,,,"[""296 s., xvi s. obr. příl. :""]","[""portréty, faksim. ;""]","[""24 cm""]",,,,…,,,2010,296,"""Q11912064""",,,,,"[""cs""]","""Vladislav Zadrobílek""","""Vladislav Zadrobílek""",,"[""Česko"", ""Československo""]",,"""český básník, hermetik a hudeb…","""Czech poet, hermeticist and mu…",,,"[""spisovatel"", ""básník"", … ""výtvarník""]",,,,,,,,"""muž""","[""Praha"", ""Stodůlky""]","[""Praha""]",1932.0,"""+1932-11-14T00:00:00Z""",2010.0,"""+2010-12-11T00:00:00Z""",,"""https://cs.wikipedia.org/wiki/…",
"""1""","""Matoušek, Karel,""","""jk01080803""","[""aut""]","""1914-1969""",,,,,"""bk194102191""",""" nam a22 1 4500""","""990308s1941 xr |…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Učebnice psaní strojem desetip…","""pro obchodní učiliště a praxi …","""Matoušek, Petrásek, Zářecký""",,,,,,"[""42 s. :""]","[""[XX] příl. ;""]","[""4°""]",,,,…,,,1941,42,"""Q95364149""",,,,,[],"""Karel Matoušek""","""Karel Matoušek""",,,,"""Narozen 6.8.1914, zemřel 3.9.1…",,,,"[""stenograf""]",,,,,,,,"""muž""",,,1914.0,"""+1914-08-06T00:00:00Z""",1969.0,"""+1969-09-03T00:00:00Z""",,,
"""1""","""Augustin, Hans""","""jn20001005230""","[""aut""]",,,,,,"""bk193900053""",""" nam a22 1 4500""","""990118s1939 xr a 0…","[""(Brož.)""]",,,,,,,,,"""1""","""0""","""Učebnice němčiny""","""němčina hovorem i obrazem pro …","""napsal H. Augustin""",,,,,,"[""vii, 379 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,,1939,379,"""Q120573722""",,,,,[],"""Hans Augustin""","""Hans Augustin""",,,,"""autor učebnic němčiny""",,,,,,,,,,,,"""muž""",,,,,,,,,
"""1""","""Gladkij, Ivan,""","""jn20000400777""","[""aut""]","""1933-""",,,,,"""ck9100177""",""" nam a22 4500""","""910208s1990 xr u0…","[""(Brož.) :""]","[""2 Kčs""]",,,,,,,,"""1""","""0""","""Vybrané kapitoly ze sociálního…",,"""I. Gladkij""",,,,,,"[""34 s. ;""]",,"[""20 cm""]",,,,…,,,1990,34,"""Q95391844""",,,,,[],"""Ivan Gladkij""","""Ivan Gladkij""",,"[""Československo""]",,"""Narozen 17.5.1933 v Brně. Prof…",,,,"[""lékař""]",,,,,,,,"""muž""","[""Brno""]",,1933.0,"""+1933-05-17T00:00:00Z""",,,,,


In [15]:
def zebricek(sloupec):
    return df.explode(sloupec).group_by(sloupec).len().sort(by="len",descending=True).head(10)

In [16]:
zebricek("vezeni")

vezeni,len
str,u32
,297291
"""Malá pevnost Terezín""",1512
"""Koncentrační tábor Dachau""",669
"""Židovské ghetto Terezín""",634
"""Koncentrační tábor Buchenwald""",459
"""Koncentrační tábor Osvětim""",458
"""Koncentrační tábor Mauthausen-…",376
"""Koncentrační tábor Sachsenhaus…",77
"""Internační tábor Svatobořice""",70
"""Koncentrační tábor Neuengamme""",36


In [17]:
zebricek("strany")

strany,len
str,u32
,289105
"""Komunistická strana Českoslove…",5171
"""Sociální demokracie""",1406
"""Národní strana svobodomyslná""",844
"""Česká strana národně sociální""",674
"""Občanská demokratická strana""",596
"""Národní strana""",480
"""KDU-ČSL""",460
"""Občanské fórum""",435
"""Komunistická strana Čech a Mor…",336


In [18]:
zebricek("profese")

profese,len
str,u32
,81272
"""spisovatel""",72968
"""učitel""",36484
"""vysokoškolský učitel""",32279
"""překladatel""",30444
"""básník""",29000
"""pedagog""",28848
"""publicista""",25746
"""redaktor""",24075
"""historik""",21399


In [19]:
zebricek("udalosti")

udalosti,len
str,u32
,297772
"""ordinace""",803
"""odvod branců""",442
"""svatba""",411
"""Transport Ek""",117
"""Transport L""",64
"""Transport Cc""",64
"""svěcení""",60
"""emigrace""",58
"""Transport Ds""",58


In [59]:
def zkusenost(sloupec="", nazev="", hodnota=None, rok=1900):
    vysledek = df.filter(pl.col("rok") >= rok).group_by("rok").len().sort(by="rok")
    if hodnota != None:
        srovnani = df.explode(sloupec).filter(pl.col(sloupec) == hodnota).group_by("rok").len()
    else:
        srovnani = df.filter(~pl.col(sloupec).is_null()).group_by("rok").len()
    return vysledek.join(srovnani, on='rok', how='left').with_columns((pl.col('len_right') / pl.col('len')).alias('podil')).sort(by="rok").with_columns(pl.lit(nazev).alias("co")) # .with_columns(pl.col("podil").rolling_mean(window_size=2))

In [75]:
kriminal = zkusenost(sloupec="vezeni", nazev="pobyt ve vězení nebo koncentračním táboře")

In [63]:
ksc = zkusenost(sloupec="strany", hodnota="Komunistická strana Československa", nazev="členství v KSČ")

In [23]:
terezin = zkusenost(sloupec="vezeni", nazev="internace v Terezíně", hodnota="Malá pevnost Terezín")

In [24]:
instagram = zkusenost(sloupec="instagram",nazev="účet na Instagramu")

In [25]:
fb = zkusenost(sloupec="facebook", nazev="účet na Facebooku")

In [65]:
web=zkusenost(sloupec="web",nazev="webové stránky")
web

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,,,"""webové stránky"""
1901,602,,,"""webové stránky"""
1902,745,1,0.001342,"""webové stránky"""
1903,631,,,"""webové stránky"""
1904,648,1,0.001543,"""webové stránky"""
1905,613,1,0.001631,"""webové stránky"""
1906,665,1,0.001504,"""webové stránky"""
1907,645,1,0.00155,"""webové stránky"""
1908,788,2,0.002538,"""webové stránky"""
1909,727,,,"""webové stránky"""


In [27]:
terezin.sort(by="rok")

rok,len,len_right,podil,co
i64,u32,u32,f64,str
1900,432,1,,"""internace v Terezíně"""
1901,602,1,0.001988,"""internace v Terezíně"""
1902,745,1,0.001502,"""internace v Terezíně"""
1903,631,2,0.002256,"""internace v Terezíně"""
1904,648,3,0.0039,"""internace v Terezíně"""
1905,613,2,0.003946,"""internace v Terezíně"""
1906,665,2,0.003135,"""internace v Terezíně"""
1907,645,5,0.00538,"""internace v Terezíně"""
1908,788,5,0.007049,"""internace v Terezíně"""
1909,727,4,0.005924,"""internace v Terezíně"""


In [79]:
podily_faceted = alt.Chart(
    alt_friendly(pl.concat([kriminal, ksc, web])), 
    title={'text': ["Co měli čeští autoři za sebou nebo před sebou"], "subtitle":
        ["Jak velkou část knih poprvé vydaných v daném roce napsali",
        "lidé s určitou životní zkušeností – bez ohledu na to, kdy tuto",
        "zkušenost udělali. Povšimněte si drobných zubů v roce 1990:", 
        "po revoluci začali vycházet jak lidé dříve věznění, tak vyloučení",
        "členové KSČ. Data jsou neúplná, reálné podíly budou spíše vyšší;",
         "podstatné jsou zde trendy."
    ]}).mark_bar(width=2).encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
    ),
    alt.Y('podil:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None,
          scale=alt.Scale(domainMax=0.1)
    ),
    alt.Color("co:N", 
              title=None, 
              legend=None,
              scale=alt.Scale(range=['#5E2D3A', '#D6534B', '#445B78']),
              sort=["účet na Facebooku","pobyt ve vězení nebo koncentračním táboře","členství v KSČ"]
    ),
    row=alt.Row(
        "co:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelFontWeight=500, labelFont='Asap', labelOrient="top"),
        sort=["členství v KSČ","pobyt ve vězení","účet na Facebooku"]
    )
).resolve_scale(
    x='independent',
    y='independent'
).properties(height=60, width=280).configure_view(stroke='transparent').resolve_scale(x="shared").resolve_axis(x="independent").configure_view(stroke='transparent').configure_axis(grid=False, domain=False)

podily_faceted

In [81]:
me_to_neurazi(podily_faceted, soubor="02_zkusenosti", kredity=kredity['wiki'])

<figure><a href="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" target="_blank"><img src="https://data.irozhlas.cz/knihy-grafy/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Co měli čeští autoři za sebou nebo před sebou']“. Další texty by měly být čitelné ze zdrojového souboru SVG." /></a></figure>
<figure><a href="https://michalkasparek.cz/sklad/02_zkusenosti.svg" target="_blank"><img src="https://michalkasparek.cz/sklad/02_zkusenosti.svg" width="100%" alt="Graf s titulkem „['Co měli čeští autoři za sebou nebo před sebou']“. Další texty by měly být čitelné ze zdrojového souboru SVG." /></a></figure>


In [30]:
df.group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1801,5
1802,3
1803,5
1804,7
1805,9
1806,4
1807,9
1808,5
1809,4
1810,4
