In [1]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu
from src.me_to_neurazi import me_to_neurazi

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
df = df.filter(pl.col("stran") > 15)
print(len(df))
df = df.filter(~pl.col('rok').is_null()).sort(by='rok')
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

727498
522219


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
len(cesi)

364420

In [4]:
df = df.filter(pl.col("100_7").is_in(cesi))

In [5]:
df = df.with_columns(pl.col('100_a').map_elements(hezke_jmeno, return_dtype=str).alias('jmeno'))

In [6]:
vek = pl.read_parquet(os.path.join("data","narozeni-umrti-gender.parquet"))
df = df.join(vek, on="100_7", how="left").with_columns((pl.col("rok") - pl.col("narozeni")).alias("vek"))

## První ženy

In [8]:
df.filter(pl.col("gender") == "f").group_by(["jmeno","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(100)

jmeno,100_7,rok
str,str,i64
"""Marie Antonie""","""jk01092270""",1819
"""Magdalena Dobromila Rettigová""","""jk01102378""",1825
"""Božena Němcová""","""jk01083016""",1854
"""Marie Stroupežnická""","""jk01121503""",1855
"""Marie Čacká""","""jk01020921""",1857
"""Anna Vlastimila Růžičková""","""jk01103217""",1859
"""Honorata Zapová z Wiśniowskich""","""jk01152148""",1859
"""Karolina Světlá""","""jk01121895""",1860
"""Marie Anna Lev""","""jk01071966""",1861
"""Jindřiška Ritterová z Rittersb…","""jk01102521""",1862


In [9]:
prvni_zeny = df.filter(pl.col("gender") == "f").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(10).select(pl.col("100_7")).to_series().to_list()
prvni_zeny_poradi = df.filter(pl.col("gender") == "f").group_by(["jmeno","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(10).select(pl.col("jmeno")).to_series().to_list()

In [10]:
prvni_zeny_graf = alt.Chart(
    alt_friendly(df.filter(pl.col("100_7").is_in(prvni_zeny))), 
    width=kredity['sirka'] / 1.5,
         title=alt.Title(
             [f'Prvních deset českých spisovatelek'],
         subtitle="Co tečka, to kniha. Počítáme všechna vydání.")
).mark_circle(
    color='#D6534B',
    size=8,
    opacity=1
).transform_calculate(
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
).encode(
    alt.X("rok:T", title=None),
    alt.Y("jmeno", sort=prvni_zeny_poradi, title=None, axis=alt.Axis(tickColor="white")),
    yOffset=alt.YOffset("jitter:Q", scale=alt.Scale(range=[3, 15]))
).configure_view(stroke='transparent')

prvni_zeny_graf

In [11]:
me_to_neurazi(prvni_zeny_graf, soubor='05_prvni_zeny', kredity=kredity['default'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/05_prvni_zeny.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/05_prvni_zeny.svg" width="100%" alt="Graf s titulkem „['Prvních deset českých spisovatelek']“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [12]:
df.filter(pl.col("gender") == "f").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).select(pl.col('rok')).median()

rok
f64
2009.0


In [13]:
df.filter(pl.col("gender") == "m").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).select(pl.col('rok')).median()

rok
f64
1989.0


In [14]:
df.filter(pl.col("100_7") == "jk01092270").select(pl.col(["jmeno","245_a","rok","655_a"])).sort(by='rok')

jmeno,245_a,rok,655_a
str,str,i64,list[str]
"""Marie Antonie""","""Žiwot Swaté Alžběty, dcery krá…",1819,"[""biografie""]"
"""Marie Antonie""","""Chlebowé poswátnj, aneb, Swaté…",1820,"[""příručky""]"
"""Marie Antonie""","""Serafka""",1826,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Marie Antonie""","""Myrrhowý wěneček, aneb, Utrpen…",1828,"[""české povídky""]"
"""Marie Antonie""","""Keř rozmarýnový, ze stínu do w…",1830,"[""české prózy""]"
"""Marie Antonie""","""Keř Rozmarínowý, ze stjnu do w…",1830,"[""české prózy""]"
"""Marie Antonie""","""Žiwot Swaté Alžběty, dcery krá…",1843,
"""Marie Antonie""","""Myrhový věneček""",1865,"[""didaktické povídky"", ""publikace pro mládež"", ""české povídky""]"


In [15]:
df.filter(pl.col("100_7") == "jk01102378").select(pl.col(["jmeno","245_a","rok","655_a"])).sort(by='rok')

jmeno,245_a,rok,655_a
str,str,i64,list[str]
"""Magdalena Dobromila Rettigová""","""Wěneček pro dcerky wlastenské""",1825,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Magdalena Dobromila Rettigová""","""Domácý Kuchařka, aneb, Pogedná…",1826,"[""kuchařské recepty""]"
"""Magdalena Dobromila Rettigová""","""Křesťanka wzýwagjcý Boha, aneb…",1827,"[""příručky""]"
"""Magdalena Dobromila Rettigová""","""Bjlá růže""",1827,"[""česká dramata""]"
"""Magdalena Dobromila Rettigová""","""Narcisky""",1834,"[""české povídky""]"
"""Magdalena Dobromila Rettigová""","""Kwjtj Mágowé""",1835,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Magdalena Dobromila Rettigová""","""Dobrá rada Slowanským wenkowan…",1838,"[""kuchařské recepty""]"
"""Magdalena Dobromila Rettigová""","""Mladá hospodyňka w domácnosti,…",1840,"[""příručky""]"
"""Magdalena Dobromila Rettigová""","""Jaroslaw a Terynka""",1841,"[""české povídky""]"
"""Magdalena Dobromila Rettigová""","""Pogednánj o Telecjm Mase""",1843,"[""kuchařské recepty""]"


In [16]:
df.filter(pl.col("100_7") == "jk01083016").select(pl.col(["245_a","rok","655_a"])).sort(by='rok')

245_a,rok,655_a
str,i64,list[str]
"""Národní Báchorky a Pověsti od …",1854,"[""české pověsti"", ""české pohádky""]"
"""Babička""",1855,"[""české prózy"", ""Czech prose""]"
"""Pohorská vesnice""",1856,"[""české prózy"", ""Czech prose""]"
"""Slovenské pohádky a pověsti""",1857,"[""slovenské pohádky"", ""slovenské pověsti""]"
"""Sebrané spisy Boženy Němcové""",1862,"[""české prózy""]"
"""Karla""",1862,"[""české prózy"", ""sebrané spisy"", … ""collected works""]"
"""Babička a jiné povídky""",1862,"[""české prózy"", ""sebrané spisy"", … ""collected works""]"
"""Drobné povídky""",1862,"[""české povídky""]"
"""Selská svatba""",1862,"[""české povídky"", ""črty""]"
"""Divá Bára""",1862,"[""české povídky"", ""sebrané spisy"", … ""collected works""]"


In [17]:
df.filter(pl.col("100_7") == "jk01102378").select(pl.col(["245_a","rok"])).sort(by='rok')

245_a,rok
str,i64
"""Wěneček pro dcerky wlastenské""",1825
"""Domácý Kuchařka, aneb, Pogedná…",1826
"""Křesťanka wzýwagjcý Boha, aneb…",1827
"""Bjlá růže""",1827
"""Narcisky""",1834
"""Kwjtj Mágowé""",1835
"""Dobrá rada Slowanským wenkowan…",1838
"""Mladá hospodyňka w domácnosti,…",1840
"""Jaroslaw a Terynka""",1841
"""Pogednánj o Telecjm Mase""",1843


In [18]:
df.filter(pl.col("gender") == "m").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(100)

100_a,100_7,rok
str,str,i64
"""Poupě, František Ondřej,""","""jk01100828""",1801
"""Végh, Jan,""","""jk01141890""",1801
"""Pavlovský, Antonín,""","""jk01092171""",1801
"""Kramerius, Václav Matěj,""","""xx0011353""",1801
"""Karmášek, Josef Arnošt,""","""ola2010577165""",1802
"""Puchmajer, Antonín Jaroslav,""","""jk01101689""",1802
"""Dittrich, Josef Petr Václav,""","""jk01022308""",1803
"""Tomsa, František Jan,""","""jk01132744""",1803
"""Ryba, Jakub Jan,""","""jk01103232""",1803
"""Spieß, Christian Heinrich,""","""jn20000605105""",1803


## Historický podíl

In [20]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "m").select(pl.col("100_7")).unique())

3388

In [21]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "f").select(pl.col("100_7")).unique())

109

In [22]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "m").select(pl.col("100_7")).unique()) / len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "f").select(pl.col("100_7")).unique())

31.08256880733945

In [23]:
historicky_podil = df.filter(
    pl.col('gender') == 'm').group_by("rok").len().rename({'len':'m'}).join(
    df.filter(pl.col('gender') == 'f').group_by("rok").len().rename({'len':'f'}), on='rok'
).with_columns(((pl.col('f') / (pl.col('m') + pl.col('f'))).alias('podil'))).sort(by='rok').filter(
    pl.col("rok").is_between(1800,2024)).with_columns(
    pl.col("podil").rolling_mean(window_size=2)
    )

In [24]:
historicky_podil.head(20)

rok,m,f,podil
i64,u32,u32,f64
1819,14,1,
1820,20,1,0.057143
1825,34,1,0.038095
1826,19,2,0.061905
1827,18,2,0.097619
1828,17,1,0.077778
1830,25,2,0.064815
1834,17,1,0.064815
1835,22,1,0.049517
1838,21,1,0.044466


In [25]:
historicky_podil.tail(20)

rok,m,f,podil
i64,u32,u32,f64
2005,3655,1360,0.264807
2006,3647,1508,0.281859
2007,3795,1563,0.292122
2008,3856,1609,0.293066
2009,3552,1571,0.300538
2010,3714,1614,0.304792
2011,3282,1466,0.305845
2012,3123,1465,0.314036
2013,3527,1694,0.321885
2014,3687,1819,0.327413


In [26]:
historicky_podil_graf = alt.Chart(
    alt_friendly(historicky_podil.filter(pl.col("rok") >= 1850)), width=kredity['sirka'], height=kredity['vyska_nizkych'] * 1.5,
    title=["Podíl ženských autorek na nových českých knihách všech žánrů"]
).mark_bar(width=1, color='#D6534B').encode(
    alt.X('rok:T', title=None, axis=alt.Axis(tickCount=6)),
    alt.Y('podil:Q', axis=alt.Axis(tickCount=5, labelExpr="datum.label * 100 + ' %'", orient='right'),
          title=None, scale=alt.Scale(domain=[0,0.45]))
).configure_view(stroke='transparent').configure_axisX(
    grid=False, domain=False
)

historicky_podil_graf

In [27]:
alt.Chart(
    alt_friendly(historicky_podil.filter(pl.col("rok") >= 1850)), width=300, height=100
).mark_line().encode(
    alt.X('rok:T'),
    alt.Y('f:Q')
).configure_view(stroke='transparent')

In [28]:
historicky_podil.filter(pl.col("rok") > 1940).sort(by="podil")

rok,m,f,podil
i64,u32,u32,f64
1952,904,50,0.052807
1953,1277,77,0.05464
1951,872,49,0.059896
1954,1367,107,0.06473
1950,799,57,0.067938
1946,1884,159,0.070405
1955,1343,104,0.072232
1949,900,67,0.072622
1956,1390,119,0.075367
1948,1618,133,0.078519


In [29]:
from src.me_to_neurazi import me_to_neurazi

In [30]:
me_to_neurazi(historicky_podil_graf, soubor="05_historicky_podil_zen", kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/05_historicky_podil_zen.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/05_historicky_podil_zen.svg" width="100%" alt="Omlouváme se, ale alternativní text se nepodařilo vygenerovat. Texty v grafu by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


## Současnost

In [32]:
df_do_sta = df.filter(pl.col("vek") < 100)

In [33]:
ceska_poezie = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a") == "česká poezie"
)

In [34]:
ceske_romany = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a") == "české romány"
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first')

In [35]:
ceske_kratke = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a").is_in(["české novely","české povídky","české příběhy","české prózy"])
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first')

In [36]:
ceske_vsechny_prozy = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a").is_in(["české romány","české novely","české povídky","české příběhy","české prózy"])
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first').sort(by='rok')

In [37]:
ceske_vsechny_prozy.tail(20).select(pl.col(["100_a","245_a","rok","gender"]))

100_a,245_a,rok,gender
str,str,i64,str
"""Valenová, Blanka,""","""Lenička a babička""",2025,"""f"""
"""Reviláková, Naďa,""","""Na kameni kámen""",2025,"""f"""
"""Valová, Nikola,""","""Prásknout do bot""",2025,"""f"""
"""Novotná, Martina,""","""Hříchy dávné minulosti""",2025,"""f"""
"""Krčmář, Milan""","""Vše je na dobré cestě""",2025,"""m"""
"""Valíková, Veronika,""","""Sesterské domino""",2025,"""f"""
"""Holub, Milan,""","""DušaN moja""",2025,"""m"""
"""Majchráková, Barbora,""","""Domek v malinovém údolí""",2025,"""f"""
"""Abrahamová, Jana,""","""Není vražda jako vražda""",2025,"""f"""
"""Vojtíšek, Honza,""","""Tam uvnitř""",2025,"""m"""


In [38]:
df_podil = ceske_romany.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_romany.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("romány").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [39]:
df_podil_kratke = ceske_kratke.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_kratke.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("kratší prózy").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [40]:
df_podil_poezie = ceska_poezie.filter(
    (pl.col("gender") == 'f')
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceska_poezie.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("poezie").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [41]:
df_podil_vsechny_prozy = ceske_vsechny_prozy.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_vsechny_prozy.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("próza").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [42]:
podil_v_soucasnosti = alt.Chart(
    alt_friendly(pl.concat([df_podil_vsechny_prozy, df_podil_poezie]).filter(pl.col('rok').is_between(1990,2024))), 
    title=alt.Title(["Podíl autorek na nově vydaných českých knihách"], lineHeight=21), width=kredity['sirka'], height=kredity['vyska_nizkych']
         ).mark_bar(color = '#D6534B'
         ).encode(
    alt.X("rok:T", title=None, axis=alt.Axis(tickCount=5)),
    alt.Y('podil_zen:Q', title=None, axis=alt.Axis(
        tickCount=5, labelExpr="datum.label * 100 + ' %'", orient='right'), scale=alt.Scale(domain=[0,0.5])
         ),
    alt.Row("zanr:N",header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'), sort=["próza","poezie"], title=None)
).configure_view(stroke='transparent').resolve_scale(y='shared').resolve_axis(x="independent").configure_axisX(
    grid=False, domain=False
)

podil_v_soucasnosti

In [43]:
me_to_neurazi(podil_v_soucasnosti, soubor='05_soucasny_podil_zen', kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/05_soucasny_podil_zen.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/05_soucasny_podil_zen.svg" width="100%" alt="Graf s titulkem „['Podíl autorek na nově vydaných českých knihách']“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


## Věková struktura

In [45]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [46]:
pl.concat([ceske_romany, ceske_kratke]).sample(20)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba,jmeno,narozeni,umrti,gender,vek
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str,str,i64,i64,str,i64
"""1""","""Heller, Jiří,""","""xx0027260""","[""pht""]","""1957-""",,,,,"""nkc20051584410""",""" cam a22 a 4500""","""051005s2005 xr a g 0…","""(váz.) :""","[""Kč 149,00""]","[""80-7195-004-1""]",,,,,,,"""1""","""0""","""Hlas přírody""",,"""[fotografie] Jiří Heller, [tex…",,,,,,"[""47 s. :""]","[""barev. il. ;""]","[""17 cm""]",,,,…,,,,"[""1""]","[""Klimtová, Vítězslava,""]","[""aut""]","[""1941-2016""]","[""jk01060612""]",,,,,,,,,,,,,,,,,,,,,,2005,47,"""pevná""","""Jiří Heller""",1957,,"""m""",48
"""1""","""Zet, Bohumír,""","""mzk2008469224""","[""aut""]","""1931-2013""",,,,,"""nkc20091860961""",""" nam a22 a 4500""","""090311s2008 xr a c 0…","""(brož.)""",,"[""978-80-7399-476-1""]",,,,,,,"""1""","""0""","""Kouzelné údolí pana železničář…",,"""Bohumír Zet ; [ilustrovala Alž…",,,,,,"[""85 s. :""]","[""il. ;""]","[""30 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2008,85,"""brožovaná""","""Bohumír Zet""",1931,2013.0,"""m""",77
"""1""","""Vokolek, Václav,""","""jn19981002344""","[""aut""]","""1947-""",,,,,"""nkc20152694959""",""" cam a22 i 4500""","""150623s2015 xr a c 0…","""(vázáno)""",,"[""978-80-7407-260-4""]",,,,,,,"""1""","""0""","""Knížka na prázdniny""","""(za dobré i špatné vysvědčení)…","""Václav Vokolek ; ilustrace vyt…",,,,,,"[""235 stran :""]","[""barevné ilustrace ;""]","[""22 cm""]",,,,…,,,,"[""1""]","[""Krejčová, Zdeňka,""]","[""ill""]","[""1944-""]","[""jk01063265""]",,,,,,,,,,,,,,,,,,,,,,2015,235,"""pevná""","""Václav Vokolek""",1947,,"""m""",68
"""1""","""Štorkán, Karel,""","""jk01131496""","[""aut""]","""1923-2007""",,,,,"""nkc20051580854""",""" nam a22 a 4500""","""050905s2005 xr g 0…","""(váz.) :""","[""Kč 199,00""]","[""80-243-1984-5""]",,,,,,,"""1""","""0""","""Vražda kočky domácí""",,"""Karel Štorkán""",,,,,,"[""222 s. ;""]",,"[""19 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,222,"""pevná""","""Karel Štorkán""",1923,2007.0,"""m""",82
"""1""","""Petiška, Eduard,""","""jk01092720""","[""aut""]","""1924-1987""",,,,,"""bk196003641""",""" nam a22 1 4500""","""980904s1960 xr g | 0…",,,,,,,,,,"""1""","""0""","""Než uzrají muži""",,"""Eduard Petiška""",,,,,,"[""257 s. ;""]",,"[""8°""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1960,257,,"""Eduard Petiška""",1924,1987.0,"""m""",36
"""1""","""Šárková, Danka,""","""xx0134945""","[""aut""]","""1969-""",,,,,"""nkc20132434279""",""" cam a22 a 4500""","""121126s2012 xr e 0…","""(brož.)""",,"[""978-80-86919-19-5""]",,,,,,,"""1""","""0""","""Zašívaná panenka""",,"""Danka Šárková""",,,,,,"[""86 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2012,86,"""brožovaná""","""Danka Šárková""",1969,,"""f""",43
"""1""","""Freiová, Kristýna,""","""jo2017972296""","[""aut""]","""1988-""",,,,,"""nkc20172954832""",""" nam a22 i 4500""","""171113t20172017xr g 0…","""(vázáno)""",,"[""978-80-7546-130-8""]",,,,,,,"""1""","""0""","""Galilejec""","""přišel čas procitnout! /""","""Kristýna Freiová""",,,,,,"[""206 stran ;""]",,"[""22 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017,206,"""pevná""","""Kristýna Freiová""",1988,,"""f""",29
"""1""","""Krolupperová, Daniela,""","""xx0012658""","[""aut""]","""1969-""",,,,,"""nkc20071761456""",""" nam a22 a 4500""","""071030s2007 xr a a 0…","""(váz.)""",,"[""978-80-7367-308-6""]",,,,,,,"""1""","""0""","""Putování za nejmocnějším kouzl…",,"""Daniela Krolupperová ; ilustra…",,,,,,"[""50 s. :""]","[""barev. il. ;""]","[""24 cm""]",,,,…,,,,"[""1""]","[""Plicková, Edita,""]","[""ill""]","[""1940-""]","[""jk01093465""]",,,,,,,,,,,,,,,,,,,,,,2007,50,"""pevná""","""Daniela Krolupperová""",1969,,"""f""",38
"""1""","""Čepelka, Miloň,""","""jk01021264""","[""aut""]","""1936-""",,,,,"""nkc20233548620""",""" nam a22 i 4500""","""230918s2023 xr g 0…","""(vázáno)""",,"[""978-80-7475-448-7""]",,,,,,,"""1""","""0""","""Letí to pamětí""","""vzpomínky, dojmy, glosy 2023 /""","""Miloň Čepelka""",,,,,,"[""141 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023,141,"""pevná""","""Miloň Čepelka""",1936,,"""m""",87
"""1""","""Francková, Zuzana,""","""xx0000464""","[""aut""]","""1951-""",,,,,"""nkc20102121008""",""" nam a22 a 4500""","""100818s2010 xr d 0…","""(váz.)""",,"[""978-80-7190-915-6""]",,,,,,,"""1""","""0""","""Půjčka na oplátku""",,"""Zuzana Francková""",,,,,,"[""109 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2010,109,"""pevná""","""Zuzana Francková""",1951,,"""f""",59


In [47]:
proza20x24 = pl.concat([ceske_romany, ceske_kratke]).filter((pl.col('rok') < pl.col('umrti')) | (pl.col('umrti').is_null())).filter(pl.col('rok').is_between(2001,2024)).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f']))
proza20x24m = proza20x24.filter(pl.col('gender') == 'm').with_columns(pl.col('len').rolling_mean(window_size=3))
proza20x24f = proza20x24.filter(pl.col('gender') == 'f').with_columns(pl.col('len').rolling_mean(window_size=3))
proza20x24 = pl.concat([proza20x24m, proza20x24f])

In [48]:
graf_zlom_final = alt.Chart(proza20x24.with_columns(pl.col("gender").replace_strict({"m":"muži","f":"ženy"})).filter(
    pl.col('vek').is_between(15,95)
).to_pandas(),
          title=alt.Title('Věková struktura českých spisovatelů a spisovatelek',
                          subtitle=["Graf zachycuje beletrii vydanou ve 21. století. Věková struktura zůstává podobná",
                                    "i po vyfiltrování generace mladých autorek nastupující právě v této době,",
                                    "kariérní zlom žen se tím pouze posune o několik let směrem k padesátce."]),
          width=kredity['sirka'],
          height=kredity['vyska_nizkych'] * 1.6
).mark_bar(opacity=0.5).encode(
    alt.X("vek", title=None, scale=alt.Scale(domain=[15,95])),
    alt.Y("len", stack=False, title=None, axis=alt.Axis(orient='right'), scale=alt.Scale(domainMin=1)),
    alt.Color("gender", title=None, legend=alt.Legend(orient='top'),
             scale=alt.Scale(range=['#81A9D5','#D6534B']))
).configure_view(
    stroke='transparent').configure_axis(grid=False, domain=False)

graf_zlom_final

In [49]:
do_grafu_veky = proza20x24.with_columns(pl.col("gender").replace_strict({"m":"muži","f":"ženy"})).filter(
    pl.col('vek').is_between(15,100)
).to_pandas()
do_grafu_veky['plot_vek'] = do_grafu_veky.apply(lambda row: row['vek'] + 0.5 if row['gender'] == 'ženy' else row['vek'], axis=1)

graf_zlom = alt.Chart(do_grafu_veky,
          title=alt.Title('Věková struktura českých spisovatelů a spisovatelek',
                          subtitle=["Graf zachycuje beletrii vydanou ve 21. století. Věková struktura zůstává",
                                    "podobná i po vyfiltrování generace mladých autorek nastupující právě v této",
                                    "době, kariérní zlom žen se tím pouze posune o několik let směrem k padesátce."]),
          width=kredity['sirka'],
          height=kredity['vyska_nizkych'] * 1.6
).mark_bar(width=1.5).encode(
    alt.X("plot_vek:Q", title=None, axis=alt.Axis(tickCount=5, values=list(range(20, 100, 10))), 
          scale=alt.Scale(domain=[15, 95])),
    alt.Y("len:Q", stack=False, title=None, axis=alt.Axis(orient='right')),
    alt.Color("gender:N", title=None, legend=alt.Legend(orient="top"),
             scale=alt.Scale(range=['#EED801','#D6534B']))
).configure_view(
    stroke='transparent').configure_axis(grid=False, domain=False)

graf_zlom

In [50]:
alt.Chart(proza20x24.to_pandas()).mark_bar().encode(
    x=alt.X("vek:N"),
    xOffset="gender:N",
    y=alt.Y("len:Q"),
    color=alt.Color("gender:N"),
)


In [51]:
proza20x24.filter(pl.col("gender") == "f").sort(by="len",descending=True).drop_nulls().head(1)

gender,vek,len
str,i64,f64
"""f""",42,196.0


In [52]:
proza20x24.filter(pl.col("gender") == "m").sort(by="len",descending=True).drop_nulls().head(1)

gender,vek,len
str,i64,f64
"""m""",70,195.333333


In [53]:
graf_zlom = alt.Chart(proza20x24.with_columns(pl.col("gender").replace_strict({"m":"muži","f":"ženy"})).filter(
    pl.col('vek').is_between(15,100)
).to_pandas(),
          title=alt.Title('Věková struktura českých spisovatelů a spisovatelek',
                          subtitle=["Graf zachycuje beletrii vydanou ve 21. století. Věková struktura zůstává",
                                    "podobná i po vyfiltrování generace mladých autorek nastupující právě v této",
                                    "době, kariérní zlom žen se tím pouze posune o několik let směrem k padesátce."]),
          width=kredity['sirka'] / 2.1,
          height=kredity['vyska_nizkych'] * 1.6
).mark_bar(width=1.3).encode(
    alt.X("vek", title=None),
    alt.Y("len", title=None, axis=alt.Axis(orient='right')),
    alt.Column("gender", title=None, spacing=0),
    alt.Color("gender", title=None, legend=None,
             scale=alt.Scale(range=['#DB842F','#D6534B','#DB842F']))
).configure_view(
    stroke='transparent').configure_axis(grid=False, domain=False)

graf_zlom

In [54]:
me_to_neurazi(graf_zlom_final, soubor="05_vekova_struktura", kredity=kredity['wiki'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/05_vekova_struktura.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/05_vekova_struktura.svg" width="100%" alt="Graf s titulkem „Věková struktura českých spisovatelů a spisovatelek“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [55]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).filter(
        pl.col('rok').is_between(2000,2010)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [56]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [57]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [58]:
alt.Chart(
    pl.concat([ceska_poezie]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [59]:
alt.Chart(
    alt_friendly(pl.concat([ceske_romany, ceske_kratke]).group_by(["gender","rok"]).agg(pl.col('vek').median()).filter(pl.col('gender').is_in(['m','f'])))
).mark_line().encode(alt.X("rok"),alt.Y("vek"),alt.Row("gender"))

## Věková struktura, alternativní pohledy

### Pohled na debutanty a debutantky

In [62]:
df_debuty = df.explode("655_a").filter(
        pl.col("655_a").is_in(["české romány","české novely","české povídky","české příběhy","české prózy"])
).sort(by="rok").unique(subset=['100_7'], keep="first")

In [63]:
df_debuty.sample(20).select(pl.col(['100_a','245_a','rok','gender']))

100_a,245_a,rok,gender
str,str,i64,str
"""Lipner, Tomáš,""","""Hvězdolet Vltava""",2022,"""m"""
"""Štrobová, Alena,""","""Jak poštovní skřítci slavili n…",2018,"""f"""
"""Pekárková, Veronika,""","""16-03-20""",2021,"""f"""
"""Vodák, František,""","""Abendland, aneb, Legenda o pos…",2013,"""m"""
"""Sauer, Franta,""","""Franta Habán ze Žižkova""",1965,"""m"""
"""Waagnerová, Kristina,""","""Zlatá grai""",2021,"""f"""
"""Janova,""","""Skorkoviny""",2005,"""f"""
"""Kročková, Taťána,""","""Mimo prostor a čas""",2010,"""f"""
"""Musil, Vítězslav,""","""Román o Valdštejnovi""",2022,"""m"""
"""Mahlerová-Šustková, Simona,""","""Přes kočičí hřbet""",2009,"""f"""


In [64]:
debuty_vyvoj = df_debuty.group_by(["rok","gender"]).len().pivot(
    index="rok",         # Keep 655_a as the index
    columns="gender",      # Spread gender values to columns
    values="len",          # Use len values as the values
    aggregate_function="first"  # If there are duplicates, take the first value
).with_columns([
    pl.col("m").alias("len_m"),   # Rename "m" to "len_m"
    pl.col("f").alias("len_f")    # Rename "f" to "len_f"
]).drop(["m", "f"]).with_columns(
    (pl.col('len_f') / (pl.col('len_m') + pl.col('len_f'))).alias('podil')
).with_columns(
    (pl.col('len_f') + pl.col('len_m')).alias('celkem')
).sort(
    by="podil")

In [65]:
alt.Chart(alt_friendly(debuty_vyvoj.filter(pl.col('rok') >= 1990))).mark_line().encode(alt.X('rok:T'),alt.Y('podil:Q'))

### Zkusíme vyfiltrovat ženy, které již publikovaly dříve – pro odstínění možného nástupu mladé generace

In [67]:
pl.concat([ceske_romany, ceske_kratke]).group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1804,1
1807,1
1810,1
1814,1
1815,1
1818,1
1819,1
1823,2
1824,4
1825,12


In [68]:
pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') >= 2000)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba,jmeno,narozeni,umrti,gender,vek
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str,str,i64,i64,str,i64
"""1""","""Štulcová, Renata,""","""mzk2003194949""","[""aut""]","""1969-""",,,,,"""nkc20102126438""",""" nam a22 a 4500""","""100914s2010 xr abf c 0…","""(váz.)""",,"[""978-80-00-02488-2""]",,,,,,,"""1""","""0""","""Mojmír""","""cesta pravého krále /""","""Renata Štulcová ; ilustrovala …",,,,,,"[""299 s., [32] s. barev. obr. příl. :""]","[""il., mapy ;""]","[""25 cm""]",,,,…,,,,"[""1""]","[""Fučíková, Renáta,""]","[""ill""]","[""1964-""]","[""jn20001005412""]",,,,,,,,,,,,,,,,,,,,,,2010,299,"""pevná""","""Renata Štulcová""",1969,,"""f""",41
"""1""","""Ludvíková, Jitka,""","""ola2015861614""","[""aut""]","""1981-""",,,,,"""nkc20243603349""",""" nam a22 i 4500""","""241010s2024 xr g 0…","""(vázáno)""",,"[""978-80-242-9951-8""]",,,,,,,"""1""","""0""","""Chcípneš""",,"""Jitka Ludvíková""",,,,,,"[""311 stran ;""]",,"[""19 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,311,"""pevná""","""Jitka Ludvíková""",1981,,"""f""",43
"""1""","""Stínil, Luděk,""","""xx0003294""","[""aut""]","""1975-""",,,,,"""nkc20051574817""",""" nam a22 a 4500""","""050711s2006 xr d 0…","""(váz.)""",,"[""80-7301-157-3""]",,,,,,,"""1""","""0""","""Duhová touha""",,"""Luděk Stínil""",,,,,,"[""125 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006,125,"""pevná""","""Luděk Stínil""",1975,,"""m""",31
"""1""","""Zvelebilová, Rosana,""","""xx0262137""","[""aut"", ""ill""]","""1987-""",,,,,"""nkc20213324562""",""" nam a22 i 4500""","""210714s2021 xr a g 0…","""(brožováno)""",,"[""978-80-88392-10-1""]",,,,,,,"""1""","""0""","""Bedny""","""zabít sráče! /""","""Rosana Zvelebilová""",,,,,,"[""367 stran :""]","[""ilustrace (převážně barevné) ;""]","[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,367,"""brožovaná""","""Rosana Zvelebilová""",1987,,"""f""",34
"""1""","""Horáková, Naďa,""","""ola2002105147""","[""aut""]","""1962-""",,,,,"""nkc20253667254""",""" nam a22 i 4500""","""250214s2025 xr g 0…","""(vázáno)""",,"[""978-80-279-1775-4""]",,,,,,,"""1""","""0""","""Nebeská růže""",,"""Naďa Horáková""",,,,,,"[""237 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025,237,"""pevná""","""Naďa Horáková""",1962,,"""f""",63
"""1""","""Svobodová, Vlasta,""","""xx0000829""","[""aut""]","""1927-""",,,,,"""nkc20051499808""",""" nam a22 a 4500""","""050214s2005 xr d 0…","""(váz.)""",,"[""80-7301-140-9""]",,,,,,,"""1""","""0""","""Krása není všechno""",,"""Vlasta Svobodová""",,,,,,"[""125 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,125,"""pevná""","""Vlasta Svobodová""",1927,,"""f""",78
"""1""","""Salichov, Arif,""","""jn19990007316""","[""aut""]","""1951-2018""",,,,,"""cpk20051492360""",""" cam a22 a 4500""","""041110s2004 xr a g 0…","""(váz.)""",,"[""80-86699-20-X""]",,,,,,,"""1""","""0""","""Když jdou kytky na popravu""",,"""Arif Salichov ; [ilustrace Vla…",,,,,,"[""183 s. :""]","[""il. ;""]","[""19 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2004,183,"""pevná""","""Arif Salichov""",1951,2018,"""m""",53
"""1""","""Kanakaredes, Victoria,""","""osa2011670834""","[""aut""]","""1982-""",,,,,"""nkc20112225095""",""" cam a22 a 4500""","""111115s2011 xr g 0…","""(váz.)""",,"[""978-80-7388-586-1""]",,,,,,,"""1""","""0""","""Eleonora ze Schwarzenbergu""","""skutečná upíří princezna /""","""Victoria Kanakaredes""",,,,,,"[""350 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,350,"""pevná""","""Victoria Kanakaredes""",1982,,"""f""",29
"""1""","""Abrahamová, Jana,""","""xx0018456""","[""aut""]","""1959-""",,,,,"""nkc20061664269""",""" nam a22 a 4500""","""060724s2006 xr g 0…","""(váz.)""",,"[""80-7301-182-4""]",,,,,,,"""1""","""0""","""Šamanky z Maxova""",,"""Jana Abrahamová""",,,,,,"[""126 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006,126,"""pevná""","""Jana Abrahamová""",1959,,"""f""",47
"""1""","""Krečmer, Josef,""","""jn19990209421""","[""aut""]","""1930-2016""",,,,,"""nkc20010887719""",""" cam a22 a 4500""","""001206s2000 xr a g 0…","""(váz.)""",,"[""80-86355-55-1""]",,,,,,,"""1""","""0""","""Hříšný světec šumař Janák""",,"""Josef Krečmer ; [ilustrace Jiř…",,,,,,"[""139 s. :""]","[""il. ;""]","[""22 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2000,139,"""pevná""","""Josef Krečmer""",1930,2016,"""m""",70


In [69]:
hranicni_rok = 2005
zavedeni = df.filter(pl.col('rok').is_between(1980,hranicni_rok)).select(pl.col('100_7')).to_series().to_list()
print(len(zavedeni))
struktura_zavedenych = pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') > hranicni_rok).filter(pl.col('100_7').is_in(zavedeni)).group_by(['gender','vek']).len().sort(by='vek')
struktura_zavedenych_m = struktura_zavedenych.filter(pl.col('gender') == 'm').with_columns(pl.col('len').rolling_mean(window_size=3))
struktura_zavedenych_f = struktura_zavedenych.filter(pl.col('gender') == 'f').with_columns(pl.col('len').rolling_mean(window_size=3))
struktura_zavedenych = pl.concat([struktura_zavedenych_m, struktura_zavedenych_f])

alt.Chart(struktura_zavedenych.to_pandas()).mark_line().encode(alt.X('vek'),alt.Y('len'),alt.Color('gender'))

90865


In [70]:
struktura_zavedenych.filter(pl.col('vek').is_between(47,52)).sort(by='vek')

gender,vek,len
str,i64,f64
"""m""",47,56.666667
"""f""",47,64.333333
"""m""",48,60.333333
"""f""",48,61.333333
"""m""",49,66.0
"""f""",49,58.666667
"""m""",50,60.666667
"""f""",50,56.0
"""m""",51,68.0
"""f""",51,52.666667


In [71]:
pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') >= 2000).group_by('vek').len()

vek,len
i64,u32
12,2
15,18
18,29
24,91
21,73
36,333
30,211
45,348
39,327
42,336


## Poměry žánrů

In [73]:
pomery_2000 = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).filter(pl.col("rok") >= 2000).explode("655_a").group_by(['gender','655_a']).len().pivot(
    index="655_a",         # Keep 655_a as the index
    columns="gender",      # Spread gender values to columns
    values="len",          # Use len values as the values
    aggregate_function="first"  # If there are duplicates, take the first value
).with_columns([
    pl.col("m").alias("len_m"),   # Rename "m" to "len_m"
    pl.col("f").alias("len_f")    # Rename "f" to "len_f"
]).drop(["m", "f"]).with_columns(
    (pl.col('len_f') / (pl.col('len_m') + pl.col('len_f'))).alias('podil')
).with_columns(
    (pl.col('len_f') + pl.col('len_m')).alias('celkem')
).filter(pl.col("celkem") >= 100).sort(
    by="podil")

In [74]:
pomery_2000

655_a,null,len_m,len_f,podil,celkem
str,u32,u32,u32,f64,u32
"""kázání""",,133,7,0.05,140
"""sermons""",,126,7,0.052632,133
"""non-fiction""",10,1261,82,0.061057,1343
"""literatura faktu""",11,1324,88,0.062323,1412
"""kreslené vtipy""",1,191,13,0.063725,204
"""jokes""",1,207,18,0.08,225
"""chronologické přehledy""",2,119,12,0.091603,131
"""chronological surveys""",2,106,11,0.094017,117
"""eseje""",2,893,95,0.096154,988
"""papers by one author""",,165,19,0.103261,184


In [75]:
pomery_2000.sort(by="celkem",descending=True)

655_a,null,len_m,len_f,podil,celkem
str,u32,u32,u32,f64,u32
"""monografie""",125,8323,2951,0.261753,11274
"""monographs""",121,8061,2891,0.26397,10952
"""učebnice vysokých škol""",332,6477,2953,0.31315,9430
"""textbooks (higher)""",314,5961,2714,0.312853,8675
"""příručky""",225,5517,2915,0.345707,8432
"""česká poezie""",56,5707,2356,0.292199,8063
"""Czech poetry""",53,5589,2327,0.293962,7916
"""handbooks and manuals""",204,5055,2743,0.351757,7798
"""publikace pro děti""",29,2765,4010,0.591882,6775
"""Czech fiction""",16,3626,3115,0.462098,6741
