In [1]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
df = df.filter(pl.col("stran") > 15)
print(len(df))
df = df.filter(~pl.col('rok').is_null()).sort(by='rok')
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a"], keep="first")
print(len(df))

727498
522219


In [3]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
len(cesi)

364420

In [4]:
df = df.filter(pl.col("100_7").is_in(cesi))

In [5]:
df.with_columns(pl.col('100_a').map_elements(hezke_jmeno, return_dtype=str).alias('jmeno')).select(pl.col("jmeno"))

jmeno
str
"""Anuše Mittenhubrová"""
"""Zdeněk Adla"""
"""Ján Rapoš"""
"""Jan Šejbl"""
"""Ludvík B. Böhm"""
"""Stanislav Musil"""
"""Ilona Schelleová"""
"""Alois Dostál"""
"""Josef Košťálek"""
"""Jiří Brdlík"""


In [6]:
df = df.with_columns(pl.col('100_a').map_elements(hezke_jmeno, return_dtype=str).alias('jmeno'))

In [7]:
vek = pl.read_parquet(os.path.join("data","narozeni-umrti-gender.parquet"))
df = df.join(vek, on="100_7", how="left").with_columns((pl.col("rok") - pl.col("narozeni")).alias("vek"))

## První ženy

In [9]:
df.filter(pl.col("gender") == "f").group_by(["jmeno","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(100)

jmeno,100_7,rok
str,str,i64
"""Marie Antonie""","""jk01092270""",1819
"""Magdalena Dobromila Rettigová""","""jk01102378""",1825
"""Božena Němcová""","""jk01083016""",1854
"""Marie Stroupežnická""","""jk01121503""",1855
"""Marie Čacká""","""jk01020921""",1857
"""Anna Vlastimila Růžičková""","""jk01103217""",1859
"""Honorata Zapová z Wiśniowskich""","""jk01152148""",1859
"""Karolina Světlá""","""jk01121895""",1860
"""Marie Anna Lev""","""jk01071966""",1861
"""Jindřiška Ritterová z Rittersb…","""jk01102521""",1862


In [10]:
prvni_zeny = df.filter(pl.col("gender") == "f").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(10).select(pl.col("100_7")).to_series().to_list()
prvni_zeny_poradi = df.filter(pl.col("gender") == "f").group_by(["jmeno","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(10).select(pl.col("jmeno")).to_series().to_list()

In [11]:
alt.Chart(alt_friendly(df.filter(pl.col("100_7").is_in(prvni_zeny))), width=150,
         title=alt.Title(
             [f'Prvních {len(prvni_zeny_poradi)} českých spisovatelek'],
         subtitle="Co tečka, to kniha. Počítáme všechna vydání.")).mark_point().encode(
    alt.X("rok:T", title=None),
    alt.Y("jmeno", sort=prvni_zeny_poradi, title=None)
)

In [12]:
df.filter(pl.col("gender") == "f").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).select(pl.col('rok')).median()

rok
f64
2009.0


In [13]:
df.filter(pl.col("gender") == "m").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).select(pl.col('rok')).median()

rok
f64
1989.0


In [14]:
df.filter(pl.col("100_7") == "jk01092270").select(pl.col(["jmeno","245_a","rok","655_a"])).sort(by='rok')

jmeno,245_a,rok,655_a
str,str,i64,list[str]
"""Marie Antonie""","""Žiwot Swaté Alžběty, dcery krá…",1819,"[""biografie""]"
"""Marie Antonie""","""Chlebowé poswátnj, aneb, Swaté…",1820,"[""příručky""]"
"""Marie Antonie""","""Serafka""",1826,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Marie Antonie""","""Myrrhowý wěneček, aneb, Utrpen…",1828,"[""české povídky""]"
"""Marie Antonie""","""Keř rozmarýnový, ze stínu do w…",1830,"[""české prózy""]"
"""Marie Antonie""","""Keř Rozmarínowý, ze stjnu do w…",1830,"[""české prózy""]"
"""Marie Antonie""","""Žiwot Swaté Alžběty, dcery krá…",1843,
"""Marie Antonie""","""Myrhový věneček""",1865,"[""didaktické povídky"", ""publikace pro mládež"", ""české povídky""]"


In [15]:
df.filter(pl.col("100_7") == "jk01102378").select(pl.col(["jmeno","245_a","rok","655_a"])).sort(by='rok')

jmeno,245_a,rok,655_a
str,str,i64,list[str]
"""Magdalena Dobromila Rettigová""","""Wěneček pro dcerky wlastenské""",1825,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Magdalena Dobromila Rettigová""","""Domácý Kuchařka, aneb, Pogedná…",1826,"[""kuchařské recepty""]"
"""Magdalena Dobromila Rettigová""","""Křesťanka wzýwagjcý Boha, aneb…",1827,"[""příručky""]"
"""Magdalena Dobromila Rettigová""","""Bjlá růže""",1827,"[""česká dramata""]"
"""Magdalena Dobromila Rettigová""","""Narcisky""",1834,"[""české povídky""]"
"""Magdalena Dobromila Rettigová""","""Kwjtj Mágowé""",1835,"[""české povídky"", ""didaktické povídky"", ""publikace pro mládež""]"
"""Magdalena Dobromila Rettigová""","""Dobrá rada Slowanským wenkowan…",1838,"[""kuchařské recepty""]"
"""Magdalena Dobromila Rettigová""","""Mladá hospodyňka w domácnosti,…",1840,"[""příručky""]"
"""Magdalena Dobromila Rettigová""","""Jaroslaw a Terynka""",1841,"[""české povídky""]"
"""Magdalena Dobromila Rettigová""","""Pogednánj o Telecjm Mase""",1843,"[""kuchařské recepty""]"


In [16]:
df.filter(pl.col("100_7") == "jk01083016").select(pl.col(["245_a","rok","655_a"])).sort(by='rok')

245_a,rok,655_a
str,i64,list[str]
"""Národní Báchorky a Pověsti od …",1854,"[""české pověsti"", ""české pohádky""]"
"""Babička""",1855,"[""české prózy"", ""Czech prose""]"
"""Pohorská vesnice""",1856,"[""české prózy"", ""Czech prose""]"
"""Slovenské pohádky a pověsti""",1857,"[""slovenské pohádky"", ""slovenské pověsti""]"
"""Drobné povídky""",1862,"[""české povídky""]"
"""Divá Bára""",1862,"[""české povídky"", ""sebrané spisy"", … ""collected works""]"
"""Národní báchorky a pověsti""",1862,"[""české pohádky""]"
"""Babička a jiné povídky""",1862,"[""české prózy"", ""sebrané spisy"", … ""collected works""]"
"""Selská svatba""",1862,"[""české povídky"", ""črty""]"
"""Sebrané spisy Boženy Němcové""",1862,"[""české prózy""]"


In [17]:
df.filter(pl.col("100_7") == "jk01102378").select(pl.col(["245_a","rok"])).sort(by='rok')

245_a,rok
str,i64
"""Wěneček pro dcerky wlastenské""",1825
"""Domácý Kuchařka, aneb, Pogedná…",1826
"""Křesťanka wzýwagjcý Boha, aneb…",1827
"""Bjlá růže""",1827
"""Narcisky""",1834
"""Kwjtj Mágowé""",1835
"""Dobrá rada Slowanským wenkowan…",1838
"""Mladá hospodyňka w domácnosti,…",1840
"""Jaroslaw a Terynka""",1841
"""Pogednánj o Telecjm Mase""",1843


In [18]:
df.filter(pl.col("gender") == "m").group_by(["100_a","100_7"]).agg(pl.col('rok').min()).sort(by='rok').head(100)

100_a,100_7,rok
str,str,i64
"""Kramerius, Václav Matěj,""","""xx0011353""",1801
"""Pavlovský, Antonín,""","""jk01092171""",1801
"""Végh, Jan,""","""jk01141890""",1801
"""Poupě, František Ondřej,""","""jk01100828""",1801
"""Karmášek, Josef Arnošt,""","""ola2010577165""",1802
"""Puchmajer, Antonín Jaroslav,""","""jk01101689""",1802
"""Dittrich, Josef Petr Václav,""","""jk01022308""",1803
"""Ryba, Jakub Jan,""","""jk01103232""",1803
"""Tomsa, František Jan,""","""jk01132744""",1803
"""Spieß, Christian Heinrich,""","""jn20000605105""",1803


## Historický podíl

In [20]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "m").select(pl.col("100_7")).unique())

3388

In [21]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "f").select(pl.col("100_7")).unique())

109

In [22]:
len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "m").select(pl.col("100_7")).unique()) / len(df.filter(pl.col("rok").is_between(1801,1900)).filter(pl.col("gender") == "f").select(pl.col("100_7")).unique())

31.08256880733945

In [23]:
historicky_podil = df.filter(
    pl.col('gender') == 'm').group_by("rok").len().rename({'len':'m'}).join(
    df.filter(pl.col('gender') == 'f').group_by("rok").len().rename({'len':'f'}), on='rok'
).with_columns(((pl.col('f') / (pl.col('m') + pl.col('f'))).alias('podil'))).sort(by='rok').filter(
    pl.col("rok").is_between(1800,2024)).with_columns(
    pl.col("podil").rolling_mean(window_size=2)
    )

In [24]:
historicky_podil.head(20)

rok,m,f,podil
i64,u32,u32,f64
1819,14,1,
1820,20,1,0.057143
1825,34,1,0.038095
1826,19,2,0.061905
1827,18,2,0.097619
1828,17,1,0.077778
1830,25,2,0.064815
1834,17,1,0.064815
1835,22,1,0.049517
1838,21,1,0.044466


In [25]:
historicky_podil.tail(20)

rok,m,f,podil
i64,u32,u32,f64
2005,3655,1360,0.264807
2006,3647,1508,0.281859
2007,3795,1563,0.292122
2008,3856,1609,0.293066
2009,3552,1571,0.300538
2010,3714,1614,0.304792
2011,3282,1466,0.305845
2012,3123,1465,0.314036
2013,3527,1694,0.321885
2014,3687,1819,0.327413


In [26]:
historicky_podil_graf = alt.Chart(
    alt_friendly(historicky_podil.filter(pl.col("rok") >= 1850)), width=300, height=100,
    title=["Podíl ženských autorek na nově vydaných","původních českých knihách všech žánrů"]
).mark_line().encode(
    alt.X('rok:T', title=None, axis=alt.Axis(tickCount=6)),
    alt.Y('podil:Q', axis=alt.Axis(tickCount=5, labelExpr="datum.label * 100 + ' %'", orient='right'),
          title=None, scale=alt.Scale(domain=[0,0.45]))
).configure_view(stroke='transparent')

historicky_podil_graf

In [27]:
alt.Chart(
    alt_friendly(historicky_podil.filter(pl.col("rok") >= 1850)), width=300, height=100
).mark_line().encode(
    alt.X('rok:T'),
    alt.Y('f:Q')
).configure_view(stroke='transparent')

In [28]:
historicky_podil.filter(pl.col("rok") > 1940).sort(by="podil")

rok,m,f,podil
i64,u32,u32,f64
1952,904,50,0.052807
1953,1277,77,0.05464
1951,872,49,0.059896
1954,1367,107,0.06473
1950,799,57,0.067938
1946,1884,159,0.070405
1955,1343,104,0.072232
1949,900,67,0.072622
1956,1390,119,0.075367
1948,1618,133,0.078519


In [29]:
from src.me_to_neurazi import me_to_neurazi

In [30]:
me_to_neurazi(historicky_podil_graf, ["data: Národní knihovna, Wikidata ~ vizualizace: iROZHLAS.cz ~ 2025"], "historicky_podil_zen")

ahoj


## Současnost

In [164]:
df_do_sta = df.filter(pl.col("vek") < 100)

In [166]:
ceska_poezie = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a") == "česká poezie"
)

In [135]:
ceske_romany = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a") == "české romány"
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first')

In [133]:
ceske_kratke = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a").is_in(["české novely","české povídky","české příběhy","české prózy"])
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first')

In [168]:
ceske_vsechny_prozy = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a").is_in(["české romány","české novely","české povídky","české příběhy","české prózy"])
).sort(by='rok').unique(subset=['100_a','245_a'], keep='first').sort(by='rok')

In [172]:
ceske_vsechny_prozy.tail(20).select(pl.col(["100_a","245_a","rok","gender"]))

100_a,245_a,rok,gender
str,str,i64,str
"""Pospíšilová, Zuzana,""","""Kouzelná třída dostává vysvědč…",2025,"""f"""
"""Novotná, Martina,""","""Hříchy dávné minulosti""",2025,"""f"""
"""Valíková, Veronika,""","""Sesterské domino""",2025,"""f"""
"""Holub, Milan,""","""DušaN moja""",2025,"""m"""
"""Šulc, Petr,""","""Vlk, který měl hlad jako vlk""",2025,"""m"""
"""Horáková, Naďa,""","""Nebeská růže""",2025,"""f"""
"""Valová, Nikola,""","""Prásknout do bot""",2025,"""f"""
"""Turčany, Roman,""","""Šamanův odkaz""",2025,"""m"""
"""Nesvadbová, Barbara,""","""Karikatura muže""",2025,"""f"""
"""Sobotka, Richard,""","""Lovecké příběhy z Beskyd""",2025,"""m"""


In [137]:
df_podil = ceske_romany.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_romany.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("romány").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [139]:
df_podil_kratke = ceske_kratke.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_kratke.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("kratší prózy").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [141]:
df_podil_poezie = ceska_poezie.filter(
    (pl.col("gender") == 'f')
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceska_poezie.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("poezie").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [174]:
df_podil_vsechny_prozy = ceske_vsechny_prozy.filter(
    pl.col("gender") == 'f'
).group_by("rok").len().sort(by="rok").rename({"len":'zeny'}).join(
    ceske_vsechny_prozy.filter(pl.col("gender") == 'm').group_by("rok").len().sort(by="rok").rename({"len":'muzi'}), on='rok'
).with_columns(
    (pl.col('zeny') + pl.col('muzi')).alias('celkem')
).with_columns(
    (pl.col('zeny') / pl.col('celkem')).alias('podil_zen')
).with_columns(pl.lit("próza").alias("zanr")).with_columns(pl.col("podil_zen").rolling_mean(window_size=2))

In [176]:
podil_v_soucasnosti = alt.Chart(
    alt_friendly(pl.concat([df_podil_vsechny_prozy, df_podil_poezie]).filter(pl.col('rok') >= 2000)), 
    title=alt.Title(["Podíl ženských autorek","na nově vydaných českých knihách"], lineHeight=21), width=250, height=80
         ).mark_line(
         ).encode(
    alt.X("rok:T", title=None, axis=alt.Axis(tickCount=5)),
    alt.Y('podil_zen:Q', title=None, axis=alt.Axis(
        tickCount=5, labelExpr="datum.label * 100 + ' %'", orient='right'), scale=alt.Scale(domain=[0,0.5])
         ),
    alt.Row("zanr:N",header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'), sort=["próza","poezie"], title=None)
).configure_view(stroke='transparent').resolve_scale(y='shared').resolve_axis(x="independent")
podil_v_soucasnosti

In [40]:
me_to_neurazi(podil_v_soucasnosti, ["data: Národní knihovna, Wikidata ~ vizualizace: iROZHLAS.cz ~ 2025"], "soucasny_podil_zen")

ahoj


## Věková struktura

In [42]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [43]:
pl.concat([ceske_romany, ceske_kratke]).sample(20).select(pl.col(['100_a','245_a','rok','stran']))

100_a,245_a,rok,stran
str,str,i64,i64
"""Šlik, Petr Hugo,""","""Ztracený na Nevděku""",2021,125
"""Svobodová, Vlasta,""","""Když se máma s tátou berou""",2008,125
"""Válková, Veronika,""","""Egypt""",2013,170
"""Cooper, Katy,""","""Vražedné kafe""",2024,440
"""Syrovátka, Tomáš,""","""Haškoviny""",2023,117
"""Beran, Josef,""","""Cesta do žaláře""",1879,113
"""Čapková, Blanka,""","""Babi, ty máš nápady!""",2011,107
"""Pecháček, Ladislav,""","""Spolek osamělých srdcí""",2010,205
"""Lanczová, Lenka,""","""Dvakrát dospělá""",2013,349
"""Žamboch, Miroslav,""","""Megapolis""",2004,180


In [44]:
proza20x24 = pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok').is_between(2000,2024)).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f']))
proza20x24m = proza20x24.filter(pl.col('gender') == 'm').with_columns(pl.col('len').rolling_mean(window_size=3))
proza20x24f = proza20x24.filter(pl.col('gender') == 'f').with_columns(pl.col('len').rolling_mean(window_size=3))
proza20x24 = pl.concat([proza20x24m, proza20x24f])

In [45]:
alt.Chart(
    proza20x24.to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [46]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).filter(
        pl.col('rok').is_between(2000,2010)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [113]:
alt.Chart(
    pl.concat([ceske_romany, ceske_kratke]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [181]:
alt.Chart(
    pl.concat([ceske_vsechny]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

NameError: name 'ceske_vsechny' is not defined

In [117]:
alt.Chart(
    pl.concat([ceska_poezie]).filter(
        pl.col('rok').is_between(2022,2024)
    ).group_by(["gender","vek"]).len().sort(by="vek").filter(pl.col('gender').is_in(['m','f'])).to_pandas()
).mark_line().encode(alt.X("vek"),alt.Y("len"),alt.Color("gender"))

In [119]:
alt.Chart(
    alt_friendly(pl.concat([ceske_romany, ceske_kratke]).group_by(["gender","rok"]).agg(pl.col('vek').median()).filter(pl.col('gender').is_in(['m','f'])))
).mark_line().encode(alt.X("rok"),alt.Y("vek"),alt.Row("gender"))

## Věková struktura, alternativní pohledy

### Pohled na debutanty a debutantky

In [198]:
df_debuty = df.explode("655_a").filter(
        pl.col("655_a").is_in(["české romány","české novely","české povídky","české příběhy","české prózy"])
).sort(by="rok").unique(subset=['100_7'], keep="first")

In [200]:
df_debuty.sample(20).select(pl.col(['100_a','245_a','rok','gender']))

100_a,245_a,rok,gender
str,str,i64,str
"""Rampa, Miroslav,""","""Krabička s plachetnicí""",1964,"""m"""
"""Rejfová, Marie,""","""Čarověník""",2015,"""f"""
"""Ondráš,""","""Malé dějiny valašskéj dědiny""",2024,"""m"""
"""Frei, Robert,""","""Krvavá dálnice""",2012,"""m"""
"""Valentová, Monika,""","""Hafni!""",2011,"""f"""
"""Jebáčková-Lažanská, Iveta,""","""V cizím sedle""",2000,"""f"""
"""Stuchlík, Ivan""","""Příběhy hrůzy""",2016,"""m"""
"""Fučikovský, Jiří,""","""Dušebraní""",2018,"""m"""
"""Vacátková, Pavlína,""","""Pohádky ze dvora""",2012,
"""Janalík, Vincenc,""","""Katolické náboženstwj prawé ut…",1844,"""m"""


In [206]:
debuty_vyvoj = df_debuty.group_by(["rok","gender"]).len().pivot(
    index="rok",         # Keep 655_a as the index
    columns="gender",      # Spread gender values to columns
    values="len",          # Use len values as the values
    aggregate_function="first"  # If there are duplicates, take the first value
).with_columns([
    pl.col("m").alias("len_m"),   # Rename "m" to "len_m"
    pl.col("f").alias("len_f")    # Rename "f" to "len_f"
]).drop(["m", "f"]).with_columns(
    (pl.col('len_f') / (pl.col('len_m') + pl.col('len_f'))).alias('podil')
).with_columns(
    (pl.col('len_f') + pl.col('len_m')).alias('celkem')
).sort(
    by="podil")

In [212]:
alt.Chart(alt_friendly(debuty_vyvoj.filter(pl.col('rok') >= 1990))).mark_line().encode(alt.X('rok:T'),alt.Y('podil:Q'))

### Zkusíme vyfiltrovat ženy, které již publikovaly dříve – pro odstínění možného nástupu mladé generace

In [50]:
pl.concat([ceske_romany, ceske_kratke]).group_by("rok").len().sort(by="rok")

rok,len
i64,u32
1804,1
1807,1
1810,1
1814,1
1815,1
1818,1
1819,1
1823,2
1824,4
1825,12


In [51]:
pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') >= 2000)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba,jmeno,narozeni,umrti,gender,vek
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str,str,i64,i64,str,i64
"""1""","""Černá, Jaroslava,""","""xx0042840""","[""aut""]","""1959-""",,,,,"""nkc20193132419""",""" nam a22 i 4500""","""190911s2019 xr g 0…","""(vázáno)""",,"[""978-80-7543-999-4""]",,,,,,,"""1""","""0""","""Valdštejn""","""zkamenělé srdce v moci ďábla :…","""Jaroslava Černá""",,,,,,"[""247 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019,247,"""pevná""","""Jaroslava Černá""",1959,,"""f""",60
"""1""","""Ríša, Vlado,""","""xx0004267""","[""aut""]","""1949-""",,,,,"""nkc20061689245""",""" cam a22 a 4500""","""060811s2006 xr g 0…","""(brož.) :""","[""Kč 189,00""]","[""80-85892-86-3""]",,,,,,,"""1""","""0""","""Conan a meč Yggrest""",,"""Vlado Ríša""",,,,,,"[""238 s. ;""]",,"[""18 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2006,238,"""brožovaná""","""Vlado Ríša""",1949,,"""m""",57
"""1""","""Keltnerová, Adelka,""","""jo20221158191""","[""aut""]","""2001-""",,,,,"""nkc20223431101""",""" nam a22 i 4500""","""220704s2022 xr a d 0…","""(vázáno)""",,"[""978-80-7568-470-7""]",,,,,,,"""1""","""0""","""Život ve snu""",,"""Adelka Keltnerová""",,,,,,"[""328 stran :""]","[""barevné ilustrace ;""]","[""22 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,328,"""pevná""","""Adelka Keltnerová""",2001,,"""f""",21
"""1""","""Horníček, Jan,""","""xx0251614""","[""aut""]","""1989-""",,,,,"""nkc20203221463""",""" nam a22 i 4500""","""200908s2020 xr g 0…","""(vázáno)""",,"[""978-80-242-6783-8""]",,,,,,,"""1""","""0""","""Čarostřelec""",,"""Jan Horníček""",,,,,,"[""261 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2020,261,"""pevná""","""Jan Horníček""",1989,,"""m""",31
"""1""","""Andres, Adam,""","""xx0013106""","[""aut""]","""1970-""",,,,,"""cpk20051495841""",""" nam a22 a 4500""","""050318s2004 xr a g 0…","""(Netopejr ;""","[""Kč 229,00""]","[""80-86096-77-7""]",,,,,,,"""1""","""0""","""Sága o Halldorovi z Mortaluny""",,"""Adam Andres ; [ilustrace Lukáš…",,,,,,"[""409 s. :""]","[""il. ;""]","[""19 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2004,409,,"""Adam Andres""",1970,,"""f""",34
"""1""","""Petrovická, Velko,""","""xx0193780""","[""aut""]","""1980-""",,,,,"""nkc20152697919""",""" cam a22 a 4500""","""150407s2015 xr e 0…","""(váz.)""",,"[""978-80-7497-028-3""]",,,,,,,"""1""","""0""","""Ďáblův hřích""",,"""Velko Petrovická""",,,,,,"[""143 s. ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2015,143,"""pevná""","""Velko Petrovická""",1980,,"""f""",35
"""1""","""Čermáková, Miroslava,""","""mzk2013755170""","[""aut""]","""1954-""",,,,,"""nkc20162813383""",""" nam a22 i 4500""","""160727s2016 xr a e 0…","""(brožováno)""",,"[""978-80-87710-31-9""]",,,,,,,"""1""","""0""","""Nečekaný odkaz""",,"""Miroslava Čermáková""",,,,,,"[""171 stran :""]","[""ilustrace ;""]","[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2016,171,"""brožovaná""","""Miroslava Čermáková""",1954,,"""f""",62
"""1""","""Binar, Ivan,""","""jk01012120""","[""aut""]","""1942-""",,,,,"""nkc20132455327""",""" nam a22 a 4500""","""130412s2013 xr g 0…","""(brož.)""",,"[""978-80-7215-448-7""]",,,,,,,"""1""","""0""","""Jen šmouha po nebi""",,"""Ivan Binar""",,,,,,"[""212 s. ;""]",,"[""20 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,212,"""brožovaná""","""Ivan Binar""",1942,,"""m""",71
"""1""","""Žamboch, Miroslav,""","""xx0000241""","[""aut""]","""1972-""",,,,,"""cpk20030891958""",""" cam a22 a 4500""","""020724s2002 xr e 0…","""(brož.) :""","[""Kč 179,00""]","[""80-85892-63-4""]",,,,,,,"""1""","""0""","""Seržant""",,"""Miroslav Žamboch""",,,,,,"[""265 s. ;""]",,"[""18 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2002,265,"""brožovaná""","""Miroslav Žamboch""",1972,,"""m""",30
"""1""","""Hindráková, Hana,""","""jo2011652868""","[""aut""]","""1982-""",,,,,"""nkc20183057233""",""" nam a22 i 4500""","""181119s2018 xr ac g 0…","""(vázáno)""",,"[""978-80-7543-859-1""]",,,,,,,"""1""","""0""","""Nezlomný""","""pravdivý příběh plný bolesti a…","""Hana Hindráková""",,,,,,"[""182 stran :""]","[""barevné ilustrace, portréty ;""]","[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,182,"""pevná""","""Hana Hindráková""",1982,,"""f""",36


In [52]:
hranicni_rok = 2005
zavedeni = df.filter(pl.col('rok').is_between(1980,hranicni_rok)).select(pl.col('100_7')).to_series().to_list()
print(len(zavedeni))
struktura_zavedenych = pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') > hranicni_rok).filter(pl.col('100_7').is_in(zavedeni)).group_by(['gender','vek']).len().sort(by='vek')
struktura_zavedenych_m = struktura_zavedenych.filter(pl.col('gender') == 'm').with_columns(pl.col('len').rolling_mean(window_size=3))
struktura_zavedenych_f = struktura_zavedenych.filter(pl.col('gender') == 'f').with_columns(pl.col('len').rolling_mean(window_size=3))
struktura_zavedenych = pl.concat([struktura_zavedenych_m, struktura_zavedenych_f])

alt.Chart(struktura_zavedenych.to_pandas()).mark_line().encode(alt.X('vek'),alt.Y('len'),alt.Color('gender'))

90865


In [53]:
struktura_zavedenych.filter(pl.col('vek').is_between(47,52)).sort(by='vek')

gender,vek,len
str,i64,f64
"""m""",47,56.666667
"""f""",47,64.333333
"""m""",48,60.333333
"""f""",48,61.333333
"""m""",49,66.0
"""f""",49,58.666667
"""m""",50,60.666667
"""f""",50,56.0
"""m""",51,68.0
"""f""",51,52.666667


In [54]:
pl.concat([ceske_romany, ceske_kratke]).filter(pl.col('rok') >= 2000).group_by('vek').len()

vek,len
i64,u32
21,73
18,29
12,2
15,18
24,91
30,211
42,336
48,339
45,348
27,147


## Poměry žánrů

In [56]:
pomery_2000 = df_do_sta.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).filter(pl.col("rok") >= 2000).explode("655_a").group_by(['gender','655_a']).len().pivot(
    index="655_a",         # Keep 655_a as the index
    columns="gender",      # Spread gender values to columns
    values="len",          # Use len values as the values
    aggregate_function="first"  # If there are duplicates, take the first value
).with_columns([
    pl.col("m").alias("len_m"),   # Rename "m" to "len_m"
    pl.col("f").alias("len_f")    # Rename "f" to "len_f"
]).drop(["m", "f"]).with_columns(
    (pl.col('len_f') / (pl.col('len_m') + pl.col('len_f'))).alias('podil')
).with_columns(
    (pl.col('len_f') + pl.col('len_m')).alias('celkem')
).filter(pl.col("celkem") >= 100).sort(
    by="podil")

In [57]:
pomery_2000

655_a,null,len_m,len_f,podil,celkem
str,u32,u32,u32,f64,u32
"""kázání""",,133,7,0.05,140
"""sermons""",,126,7,0.052632,133
"""non-fiction""",10,1261,82,0.061057,1343
"""literatura faktu""",11,1324,88,0.062323,1412
"""kreslené vtipy""",1,191,13,0.063725,204
"""jokes""",1,207,18,0.08,225
"""chronologické přehledy""",2,119,12,0.091603,131
"""chronological surveys""",2,106,11,0.094017,117
"""eseje""",2,893,95,0.096154,988
"""papers by one author""",,165,19,0.103261,184


In [58]:
pomery_2000.sort(by="celkem",descending=True)

655_a,null,len_m,len_f,podil,celkem
str,u32,u32,u32,f64,u32
"""monografie""",125,8323,2951,0.261753,11274
"""monographs""",121,8061,2891,0.26397,10952
"""učebnice vysokých škol""",332,6477,2953,0.31315,9430
"""textbooks (higher)""",314,5961,2714,0.312853,8675
"""příručky""",225,5517,2915,0.345707,8432
"""česká poezie""",56,5707,2356,0.292199,8063
"""Czech poetry""",53,5589,2327,0.293962,7916
"""handbooks and manuals""",204,5055,2743,0.351757,7798
"""publikace pro děti""",29,2765,4010,0.591882,6775
"""Czech fiction""",16,3626,3115,0.462098,6741
