In [235]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())
pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [236]:
aut = pl.read_parquet(
    os.path.join("data","aut_vyber.parquet")
).explode(
    "370_c"
).filter(
    pl.col("370_c").str.contains("Česk")
).explode(
    "100_7"
).select(
    ["100_a","100_7","370_c","374_a","375_a","678_a"]
)

In [237]:
cesi = aut.filter(pl.col("370_c").str.contains("Česk")).select(pl.col("100_7").unique()).to_series().to_list()

In [238]:
len(cesi)

267043

In [239]:
aut.sample(10)

100_a,100_7,370_c,374_a,375_a,678_a
list[str],str,str,list[str],list[str],list[str]
"[""Klusák, Hynek""]","""xx0129158""","""Česko""",,,"[""RNDr., CSc., vědecký a výzkumný pracovník v oboru kvality zrna obilovin.""]"
,,"""Česko""",,,
"[""Mazey, Mike""]","""mzk2016904810""","""Česko""","[""vinaři"", ""učitelé""]","[""muž""]","[""Australský vinař a učitel angličtiny. Žije v Česku.""]"
,,"""Česko""",,,
,,"""Česko""",,,
"[""Páluš, Peter,""]","""mzk2011643293""","""Česko""",,,"[""Narozen 1979. Mgr., divadelní teoretik a kritik, též publicista.""]"
,,"""Česko""",,,
"[""Ilkovič, Mikuláš,""]","""jx20050914024""","""Česko""",,,"[""Autor učebnic fyziky.""]"
,,"""Česko""",,,
"[""Šujan, František,""]","""jk01131633""","""Česko""",,,"[""Narozen 3.7.1859 v Kloboučkách u Bučovic, zemřel 19.7.1944 v Brně. PhDr., středoškolský profesor, publikace z oboru historie.""]"


In [240]:
len(cesi)

267043

In [241]:
cesi

['xx0058683',
 'ola2014806710',
 'xx0016755',
 'mzk2012719715',
 'jk01041277',
 'xx0016039',
 'ola2004253770',
 'mzk2009544209',
 'xx0119303',
 'ola2009488029',
 'jo20201074615',
 'nlk20000085245',
 'xx0134808',
 'jo20191049496',
 'jk01021549',
 'mzk2007401168',
 'xx0054617',
 'mzk2008448743',
 'xx0103706',
 'xx0152477',
 'pna2013759605',
 'ola2002110982',
 'jn20010310083',
 'xx0168181',
 'xx0005576',
 'mzk2010473803',
 'ola2018982229',
 'jo2008464733',
 'xx0126502',
 'xx0031304',
 'mzk2005271871',
 'jk01140362',
 'jo20000082647',
 'jx20051118014',
 'xx0055664',
 'jn20001005035',
 'js20020122031',
 'jo2002105418',
 'mzk2012694344',
 'jo2013755186',
 'ntka173873',
 'mzk2005269617',
 'jk01121169',
 'jk01101947',
 'jn19990216159',
 'jk01061930',
 'jn19981228010',
 'nlk19990074302',
 'jk01091701',
 'kpwa4025',
 'pna2006312004',
 'xx0105258',
 'xx0192844',
 'hka2015882393',
 'xx0276579',
 'jk01080672',
 'mzk2009544576',
 'xx0024583',
 'jk01080508',
 'ola200203980',
 'jk01031875',
 'ntka1732

In [242]:
df = pl.read_parquet(os.path.join("data","cnb_ceska_proza.parquet")).rename({'autorstvo_kod':'100_7'})
df = pl.concat([df, pl.read_parquet(os.path.join("data","cnb_ceska_poezie.parquet"))])
spisovatelstvo = list(set(df.select(pl.col('100_7')).drop_nulls().to_series().to_list()))

In [243]:
len(spisovatelstvo)

18992

In [317]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.filter(pl.col("100_7").is_in(cesi))
df = df.join(aut, left_on="100_7", right_on="100_7", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","072.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df.explode("655_a").group_by("655_a").len().top_k(100, by="len")

655_a,len
str,u32
,186758
"""učebnice vysokých škol""",42516
"""příručky""",34773
"""studie""",21628
"""publikace pro děti""",21145
"""monografie""",21065
"""textbooks (higher)""",20797
"""handbooks and manuals""",17960
"""místní pohlednice""",17932
"""česká poezie""",17034


In [319]:
df = df.explode("655_a").explode("072_x")

## 1. verze

In [321]:
poezie = df.filter(
    pl.col('655_a').str.contains('poez|poetr') | pl.col("072_x").str.contains('poez')
).with_columns(
    kategorie = pl.lit('poezie')
)

proza = df.filter(
    pl.col('655_a').str.contains('román|fiction|novel|příbě|povíd|próz') | pl.col('072_x').str.contains('román|fiction|novel|příbě|povíd|próz')
).with_columns(
    kategorie = pl.lit('próza')
)

vzpominky = df.filter(
    pl.col('655_a').str.contains('vzpomínky|autobiographical')
).with_columns(
    kategorie = pl.lit('vzpomínky')
)

In [323]:
do_grafu = pl.concat(
    [poezie, proza, vzpominky]
).with_columns(
    pl.col('245_a').map_elements(bez_bordelu, return_dtype=str)
).with_columns(
    pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok')
).sort(
    by="rok"
).unique(
    subset=['100_a','245_a'],keep='first'
)

In [325]:
vek = pl.read_parquet(os.path.join("data","narozeni-umrti-gender.parquet"))
do_grafu = do_grafu.join(vek, on="100_7", how="left").with_columns((pl.col("rok") - pl.col("narozeni")).alias("vek"))

In [327]:
do_grafu = do_grafu.drop_nulls("narozeni")

In [329]:
do_grafu = do_grafu.filter(pl.col("vek").is_between(15,100))

In [331]:
do_grafu.sort(by="vek").head(n=10)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,100_a_right,370_c,374_a,375_a,678_a,008,072_ind2,072_a,072_x,072_2,072_9,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,kategorie,rok,narozeni,umrti,gender,vek
str,str,str,list[str],str,str,list[str],str,str,str,list[str],str,list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],str,str,str,str,str,list[str],list[str],str,str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,i64,i64,i64,str,i64
"""1""","""Braunová, Vlasta,""","""jo2018980786""","[""aut""]","""2002-""",,,,,"""nkc20172954368""","[""Braunová, Vlasta,""]","""Česko""",,"[""žena""]","[""Narozena 19. 7. 2002. Autorka povídky ve sborníku, členka Základní umělecké školy ve Žďáru nad Sázavou.""]","""171109s2017 xr g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Šestero""","""hororové příběhy /""","""napsali pod pedagogickým veden…",,,,,,"[""7"", ""7"", … ""9""]","""české povídky""","[""fd133971"", ""fd132417"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2017,2002,,"""f""",15
"""1""","""Kneřová, Bety,""","""mzk2009543980""","[""aut""]","""1994-""",,,,,"""nkc20092008589""","[""Kneřová, Bety,""]","""Česko""",,,"[""Narozena v květnu 1994 v Karlových Varech. Autorka vyprávění o dětech a pro děti.""]","""091030s2009 xr a c 0…","[""7"", ""7""]","[""821.162.3-3"", ""821-93""]","""Česká próza""","[""Konspekt"", ""Konspekt""]","[""25"", ""26""]","""1""","""0""","""Otazníková říše""",,"""Bety Kneřová ; ilustrace Lucie…",,,,,,"[""7"", ""7"", … ""9""]","""české příběhy""","[""fd133973"", ""fd184198"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2009,1994,,"""f""",15
"""1""","""Brikciusová, Anna""","""xx0139008""","[""aut""]",,,,,,"""nkc20152765122""","[""Brikciusová, Anna""]","""Česko""","[""hudebnice"", ""violoncellistky"", … ""básnířky""]","[""žena""]","[""Narozena v Praze. Hudebnice, violoncellistka (klasická hudba), též spisovatelka a básnířka.""]","""160105s2015 xr g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Eutanazie""","""povídky /""","""Anna Brikciusová""",,,,,,"[""7"", ""9""]","""české povídky""","[""fd133971"", null]","[""czenas"", ""eczenas""]",,,,"""próza""",2015,2000,,"""f""",15
"""1""","""Černá, Barbora,""","""jn20010310308""","[""aut""]","""1986-""",,,,,"""cpk20010984975""","[""Černá, Barbora,""]","""Česko""",,,"[""Beletrie.""]","""010209s2001 xr c 0…","[""7"", ""7""]","[""821.162.3-3"", ""821-93""]","""Česká próza""","[""Konspekt"", ""Konspekt""]","[""25"", ""26""]","""1""","""0""","""Denisa""","""z deníku čtrnáctileté dívky /""","""Barbora Černá""",,,,,,"[""7"", ""7"", … ""9""]","""publikace pro mládež""","[""fd133157"", ""fd133974"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2001,1986,,,15
"""1""","""Trojovská, Inesa,""","""xx0060696""","[""aut""]","""1993-""",,,,,"""nkc20081815474""","[""Trojovská, Inesa,""]","""Česko""","[""spisovatelky""]","[""žena""]","[""Narozena 18. 5. 1993 v Hradci Králové. Spisovatelka, autorka fantasy příběhů.""]","""090108s2008 xr c 0…","[""7"", ""7""]","[""821.162.3-3"", ""821-93""]","""Česká próza""","[""Konspekt"", ""Konspekt""]","[""25"", ""26""]","""1""","""0""","""Předtucha konce""",,"""Inesa Trojovská""",,,,,,"[""7"", ""7"", … ""9""]","""české romány""","[""fd133974"", ""fd184199"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2008,1993,,"""f""",15
"""1""","""Paťhová, Magdaléna,""","""xx0302475""","[""aut""]","""2008-""",,,,,"""nkc20233524279""","[""Paťhová, Magdaléna,""]","""Česko""","[""spisovatelky"", ""studentky""]","[""žena""]","[""Narozena 2008. Autorka knih o historii Trhového Štěpánova a Prahy. Vítězka soutěže mladých talentů Zlatý oříšek 2022.""]","""230607s2023 xr a g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Svědek všeho dění""",,"""Magdaléna Paťhová""",,,,,,"[""7"", ""7"", … ""9""]","""české příběhy""","[""fd133973"", ""fd132413"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2023,2008,,"""f""",15
"""1""","""Fidlerová, Lucie,""","""mzk2014819685""","[""aut""]","""1999-""",,,,,"""nkc20142588150""","[""Fidlerová, Lucie,""]","""Česko""",,,"[""Narozena 3. 5. 1999 v Brně. Autorka fantasy románu.""]","""140502s2014 xr g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Narozen o půlnoci""",,"""Lucie Fidlerová""",,"[""Dračí znamení /""]",,,,"[""7"", ""7"", … ""9""]","""české romány""","[""fd133974"", ""fd184199"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2014,1999,,"""f""",15
"""1""","""Kott, Jindřich,""","""xx0006980""","[""cmp""]","""1902-1977""",,,,,"""hud03900247""","[""Kott, Jindřich,""]","""Česko""",,,"[""Narozen 17.2.1902 v Táboře, zemřel 21.2.1977 v Praze. Houslista.""]","""081130s1917 xr sga g …","[""7""]","[""784""]","""Vokální hudba""","[""Konspekt""]","[""9""]","""1""","""0""","""Olše""","""píseň pro jeden hlas s průvode…","""Jindřich Kott ; na slova A. So…",,,,,,"[""7"", ""7"", … ""9""]","""zhudebněná poezie""","[""fd1062826"", ""fd185981"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""poezie""",1917,1902,1977.0,"""m""",15
"""1""","""Grygar, Tomáš,""","""ola20221138391""","[""aut""]","""2006-""",,,,,"""nkc20223389862""","[""Grygar, Tomáš,""]","""Česko""","[""středoškolští studenti"", ""výtvarní umělci""]","[""muž""]","[""Narozen 28. 7. 2006. Student střední průmyslové školy, výtvarný umělec a autor knihy dětských hádanek.""]","""220105s2021 xr a b 0…","[""7"", ""7""]","[""59"", ""821-93""]","""Zoologie""","[""Konspekt"", ""Konspekt""]","[""2"", ""26""]","""1""","""0""","""Hádanky o zvířátkách""","""pro děti /""","""Tomáš Grygar""",,,,,,"[""7"", ""7"", … ""9""]","""children's poetry""","[""fd133992"", ""fd133156"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""poezie""",2021,2006,,"""m""",15
"""1""","""Tobiáš, Petr,""","""mzk2009491824""","[""aut""]","""1993-""",,,,,"""nkc20081835332""","[""Tobiáš, Petr,""]","""Česko""",,,"[""Narozen 1993. Autor povídek.""]","""081211s2008 xr c 0…","[""7"", ""7""]","[""821.162.3-3"", ""821-93""]","""Česká próza""","[""Konspekt"", ""Konspekt""]","[""25"", ""26""]","""1""","""0""","""Robin Kyber a Strana Nepřátel …",,"""Petr Tobiáš""",,,,,,"[""7"", ""7"", … ""9""]","""české romány""","[""fd133974"", ""fd133157"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2008,1993,,"""m""",15


In [333]:
do_grafu = do_grafu.filter((pl.col("umrti") >= pl.col('rok')) | pl.col("umrti").is_null())

## Mediánový věk

In [335]:
do_grafu.group_by("kategorie").agg(pl.col("vek").median())

kategorie,vek
str,f64
"""próza""",50.0
"""vzpomínky""",67.0
"""poezie""",52.0


## Druhý pokus

In [337]:
do_grafu.sample(10)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,100_a_right,370_c,374_a,375_a,678_a,008,072_ind2,072_a,072_x,072_2,072_9,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,kategorie,rok,narozeni,umrti,gender,vek
str,str,str,list[str],str,str,list[str],str,str,str,list[str],str,list[str],list[str],list[str],str,list[str],list[str],str,list[str],list[str],str,str,str,str,str,list[str],list[str],str,str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],str,i64,i64,i64,str,i64
"""1""","""Roučková, Olga,""","""xx0236517""","[""aut""]","""1984-""",,,,,"""nkc20193100105""","[""Roučková, Olga,""]","""Česko""","[""automobilové závodnice""]","[""žena""]","[""Narozena 22. 10. 1984 v Děčíně. Závodnice týmu Moto Racing Group byla první Češkou, která v lednu 2018 dokončila Rallye Dakar v kategorii čtyřkolek.""]","""190530s2019 xr ac e 0…","[""7"", ""7""]","[""796"", ""929""]","""Sport. Hry. Tělesná cvičení""","[""Konspekt"", ""Konspekt""]","[""20"", ""8""]","""1""","""0""","""Rallye Dakar""","""peklo na zemi : o splněných sn…","""Olga Roučková, Monika Nikodemo…",,,,,,"[""7"", ""7"", … ""9""]","""autobiografické vzpomínky""","[""fd131854"", ""fd132276"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""vzpomínky""",2019,1984,,"""f""",35
"""1""","""Půlpán, Karel,""","""jk01101736""","[""aut""]","""1885-1914""",,,,,"""nos190238850""","[""Půlpán, Karel,""]","""Česko""",,,"[""Narozen 21.9.1885 ve Vídni, zemřel 5.6.1914 v Čerčanech. Novinář, beletrista, též pro mládež .""]","""000211s1908 xr g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Na tvrdých cestičkách""",,"""Karel Půlpán""",,,,,,"[""7""]","""české povídky""","[""fd133971""]","[""czenas""]",,,,"""próza""",1908,1885,1914.0,"""m""",23
"""1""","""Dragoni Křenovský, Jakub,""","""jk01022946""","[""aut""]","""asi 1808-1871""",,,,,"""nkc20112176035""","[""Dragoni Křenovský, Jakub,""]","""Česko""",,,"[""Narozen asi 1808 v Křenovicích u Kojetína, zemřel 28. 9. 1871 v Brně. Středoškolský profesor, prozaik, autor didaktických povídek pro mládež a novely z vesnického prostředí. .""]","""110318s1869 xr a j 0…","[""7"", ""7""]","[""821-93"", ""821.162.3-3""]","""Literatura pro děti a mládež (…","[""Konspekt"", ""Konspekt""]","[""26"", ""25""]","""1""","""0""","""Wenzel Böhm""","""Erzählung /""","""nach J. Drg. Křenovský ; von J…",,,,,,"[""7"", ""7""]","""české povídky""","[""fd133971"", ""fd133157""]","[""czenas"", ""czenas""]",,,,"""próza""",1869,1808,1871.0,"""m""",61
"""1""","""Valenta, Richard,""","""xx0059848""","[""aut""]","""1944-""",,,,,"""nkc20071783089""","[""Valenta, Richard,""]","""Česko""",,,"[""Narozen 11.1.1944. Filmový kameraman, beletrista.""]","""071212s2008 xr a g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""O Blumovi, o Topolinu, o panu …","""a, ""--zajdem' se podívat"" /""","""Richard Valenta""",,,,,,"[""7"", ""9""]","""české prózy""","[""fd133972"", null]","[""czenas"", ""eczenas""]",,,,"""próza""",2008,1944,,"""m""",64
"""0""","""Shamaya,""","""jo2017940435""","[""aut""]","""1989-""",,,,,"""nkc20172955336""","[""Shamaya,""]","""Česko""",,"[""žena""]","[""Narozena v roce 1989 v Praze. Autorka publikace z oblasti alternativní zdravé výživy, zabývá se pránou, energetickým systémem člověka a bioenergetikou. Též blogerka a autorka esotericky zaměřeného deníku. Vystudovala obory Potravinářská a biochemická technologie a Kvalitu a bezpečnost potravin.""]","""171115s2017 xr a g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Darumi""",,"""Katie Shamaya""",,,,,,"[""7"", ""9""]","""české romány""","[""fd133974"", null]","[""czenas"", ""eczenas""]",,,,"""próza""",2017,1989,,"""f""",28
"""1""","""Lysý, Karel,""","""jk01072723""","[""aut""]","""1849-1925""",,,,,"""nkc20122360514""","[""Lysý, Karel,""]","""Česko""",,,"[""Narozen roku 1849 v Petrůvce, zemřel roku 1925 v Uherském Brodě. Katolický kněz, beletrista.""]","""120416s1888 xr e 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Pamlsky z jalovcového kraje""",,"""sebrané humoresky Karla Lhotsk…",,,,,,"[""7"", ""7""]","""české povídky""","[""fd133971"", ""fd132428""]","[""czenas"", ""czenas""]",,,,"""próza""",1888,1849,1925.0,"""m""",39
"""1""","""Fontana, Ivan,""","""mzk2003171503""","[""aut""]","""1946-""",,,,,"""nkc20223417440""","[""Fontana, Ivan,""]","""Česko""","[""zemědělští inženýři"", ""spisovatelé"", ""básníci""]",,"[""Narozen 1. 6. 1946 v Jičíně. Ing., CSc., zemědělský inženýr, básník, autor aforismů a próz, práce o ochraně půdy.""]","""220420s2022 xr g 0…","[""7""]","[""821.162.3-1""]","""Česká poezie""","[""Konspekt""]","[""25""]","""1""","""0""","""Aforismy a básně kvetoucího sa…",,"""Ivan Fontana""",,,,,,"[""7"", ""7"", … ""9""]","""česká poezie""","[""fd133958"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""poezie""",2022,1946,,"""m""",76
"""1""","""Kostrhun, Jan,""","""jk01062020""","[""aut""]","""1942-2022""",,,,,"""nkc20102146105""","[""Kostrhun, Jan,""]","""Česko""","[""spisovatelé"", ""publicisté"", … ""poslanci""]","[""muž""]","[""Narozen 3. 7. 1942 v Podivíně u Břeclavi, zemřel 2. 5. 2022. Spisovatel, rozhlasový a televizní scenárista. Politik, poslanec Sněmovny lidu Federálního shromáždění a Poslanecké sněmovny PČR.""]","""101111s2010 xr g 0…","[""7""]","[""821.162.3-3""]","""Česká próza""","[""Konspekt""]","[""25""]","""1""","""0""","""Prázdniny""",,"""Jan Kostrhun""",,,,,,"[""7"", ""9""]","""české příběhy""","[""fd133973"", null]","[""czenas"", ""eczenas""]",,,,"""próza""",2010,1942,2022.0,"""m""",68
"""1""","""Jalovec, Stanislav,""","""mzk20221168890""","[""aut""]","""1992-""",,,,,"""nkc20223471628""","[""Jalovec, Stanislav,""]","""Česko""","[""blogeři""]","[""muž""]","[""Narozen 17. 11. 1992. Badatel a bloger v oboru historie, zejména období starověku a středověku, též zaměřený na vojenské dějiny, filosofii a poezii.""]","""221111s2022 xr ach e 0…","[""7"", ""7""]","[""94(100)"", ""929""]","""Světové dějiny""","[""Konspekt"", ""Konspekt""]","[""8"", ""8""]","""1""","""0""","""Hrdinové kontra padouši""",,"""Stanislav Jalovec""",,,,,,"[""7"", ""9""]","""non-fiction""","[""fd132773"", null]","[""czenas"", ""eczenas""]",,,,"""próza""",2022,1992,,"""m""",30
"""1""","""Plicková, Edita,""","""jk01093465""","[""ill""]","""1940-""",,,,,"""nkc20233520053""","[""Plicková, Edita,""]","""Česko""","[""malířky"", ""grafičky"", ""ilustrátorky""]","[""žena""]","[""Narozena 13. 10. 1940 v Praze. Malířka, grafička a ilustrátorka. Autorka ilustrací v knihách pro děti.""]","""230510s2023 xr a a 0…","[""7"", ""7""]","[""821.162.3-3"", ""821-93""]","""Česká próza""","[""Konspekt"", ""Konspekt""]","[""25"", ""26""]","""1""","""0""","""Čteme s obrázky""","""české pohádky /""","""Edita Plicková, Marie Adamovsk…",,,,,,"[""7"", ""7"", … ""9""]","""české pohádky""","[""fd133970"", ""fd182999"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"""próza""",2023,1940,,"""f""",83


In [261]:
do_grafu_drive = do_grafu.filter(pl.col("rok").is_between(1800,1938)).with_columns(obdobi = pl.lit('1800-1938'))
print(len(do_grafu_drive))
do_grafu_ted = do_grafu.filter(pl.col("rok").is_between(2000,2020)).with_columns(obdobi = pl.lit('2000-2020'))
print(len(do_grafu_ted))

7857
26501


In [262]:
do_grafu_basnici = pl.concat([do_grafu_drive.filter(pl.col("kategorie") == "poezie"), do_grafu_ted.filter(pl.col("kategorie") == "poezie")]).group_by(["obdobi","vek"]).len()
do_grafu_basnici

obdobi,vek,len
str,i64,u32
"""2000-2020""",53,150
"""2000-2020""",88,29
"""2000-2020""",83,70
"""2000-2020""",59,140
"""1800-1938""",37,56
"""1800-1938""",33,72
"""2000-2020""",39,95
"""2000-2020""",16,4
"""2000-2020""",78,102
"""2000-2020""",49,98


In [263]:
tvar1 = alt.Chart(do_grafu_basnici.to_pandas(), width=300, height=100).mark_line(interpolate="monotone").encode(
    alt.X('vek:Q').axis(domain=False, tickSize=0, title=None),
    alt.Y('len:Q').stack('zero').axis(None),
    alt.Color('obdobi:N', sort=['2000-2020','1800-1838'], scale=alt.Scale(range=['#D6534B', '#DB842F']), legend=alt.Legend(title=None, orient="top")),
    alt.StrokeDash('obdobi:N', sort=['2000-2020','1800-1838'], legend=None)
)

tvar1

In [264]:
do_grafu_prozaici = pl.concat([do_grafu_drive.filter(pl.col("kategorie") == "próza"), do_grafu_ted.filter(pl.col("kategorie") == "próza")]).group_by(["obdobi","vek"]).len()

In [265]:
tvar2 = alt.Chart(do_grafu_prozaici.to_pandas(), width=300, height=100, title="…beletrii…").mark_line(interpolate="monotone").encode(
    alt.X('vek:Q').axis(domain=False, tickSize=0, title=None),
    alt.Y('len:Q').stack('zero').axis(None),
    alt.Color('obdobi:N', sort=['2000-2020','1800-1838'], scale=alt.Scale(range=['#D6534B', '#DB842F']), legend=None),
    alt.StrokeDash('obdobi:N', sort=['2000-2020','1800-1838'], legend=None)
)

tvar2

In [266]:
do_grafu_vzpominky = pl.concat([do_grafu_drive.filter(pl.col("kategorie") == "vzpomínky"), do_grafu_ted.filter(pl.col("kategorie") == "vzpomínky")]).group_by(["obdobi","vek"]).len()

In [267]:
do_grafu_vzpominky

obdobi,vek,len
str,i64,u32
"""2000-2020""",26,15
"""1800-1938""",58,3
"""2000-2020""",24,1
"""2000-2020""",91,21
"""2000-2020""",22,8
"""2000-2020""",55,46
"""2000-2020""",34,25
"""1800-1938""",78,1
"""1800-1938""",36,4
"""2000-2020""",88,41


In [268]:
tvar3 = alt.Chart(do_grafu_vzpominky.to_pandas(), width=300, height=100, title="…a vzpomínky").mark_line(interpolate="monotone").encode(
    alt.X('vek:Q').axis(domain=False, tickSize=0, title=None),
    alt.Y('len:Q').stack('zero').axis(None),
    alt.Color('obdobi:N', sort=['2000-2020','1800-1838'], scale=alt.Scale(range=['#D6534B', '#DB842F']), legend=None),
    alt.StrokeDash('obdobi:N', sort=['2000-2020','1800-1838'], legend=None)
)

tvar3

In [269]:
alt.vconcat(tvar1, tvar2, tvar3, title=alt.Title("V jakém věku lidé vydávají básně…")).resolve_scale(x='shared').configure_view(stroke='transparent')

## Finální verze

In [339]:
kolik = len(do_grafu.select(pl.col("100_7").unique()))
kolik

15820

In [355]:
do_grafu.select(pl.col("rok").median())

rok
f64
2008.0


In [361]:
veky_autorstva = alt.Chart(do_grafu.with_columns(pl.col("kategorie").replace({'próza':'…prózu','poezie':'…poezii','vzpomínky':'…vzpomínky'})).group_by(["vek","kategorie"]).len().to_pandas(),
         title=alt.Title(
             'V jakém věku čeští autoři a autorky publikují…',
             subtitle=[f"Osamělé vrcholky odpovídají kulatým narozeninám.","(Převažují publikace z posledních dvou dekád. Grafy mají odlišná měřítka.)"]
         )).mark_bar(width=1).encode(
    alt.X("vek:Q", scale=alt.Scale(domainMax=90, domainMin=20), title=None, axis=alt.Axis(tickCount=4)),
    alt.Y("len:Q", title=None, scale=alt.Scale(domainMin=1), axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6', orient='right', format='~s')),
    alt.Color('kategorie:N', scale=alt.Scale(range=['#E09DA3','#70871E','#DB842F',]), title=None, legend=None),
    alt.Column('kategorie:N', spacing=8, title=None, sort=['…prózu','…poezii','…vzpomínky'],
           header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelFontWeight=500, labelFont='Asap', labelOrient="top"))
).properties(
    width=kredity['sirka'] / 4.1, 
    height=kredity['vyska_nizkych'], 
    autosize={'type': 'fit', 'contains': 'padding'}
).configure_view(stroke='transparent').configure_axis(grid=False, domain=False).resolve_axis(x="independent").resolve_scale(y="independent")

veky_autorstva

In [363]:
me_to_neurazi(veky_autorstva, soubor="02_tvar_kariery", kredity=kredity['default'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/02_tvar_kariery.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/02_tvar_kariery.svg" width="100%" alt="Graf s titulkem „V jakém věku čeští autoři a autorky publikují…“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>
