In [1]:
import os
import json
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu
from src.me_to_neurazi import me_to_neurazi

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())

In [5]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(~pl.col('rok').is_null()).sort(by='rok')
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null())
print(len(df))

1001279
991612


In [7]:
df = df.filter(pl.col("rok") >= 1800)

In [6]:
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","041.parquet")), left_on="001", right_on="001", how="left")

In [216]:
predfiltr = df.filter(pl.col('stran') >= 30).unique(subset=['rok','245_a','100_a'])

In [218]:
preklady = predfiltr.drop_nulls(subset=["041_h","rok"]).explode("041_h").filter(pl.col("041_h").is_not_null()).filter(pl.col("041_h") != "cze").unique(subset=['rok','245_a','100_a'])

In [220]:
preklady_celkem = preklady.group_by(['rok']).len().rename({'len':'prekladu_celkem'}).sort(by="rok")
preklady_celkem

rok,prekladu_celkem
i64,u32
1801,4
1802,5
1803,6
1804,9
1805,2
1806,3
1807,4
1808,6
1809,7
1810,9


## Pokus č. 2, hezčejší

In [246]:
nahradit = {
    'eng': 'angličtina', 'ger': 'němčina', 'slo': 'slovenština', 'rus': 'ruština', 'fre': 'francouzština', 'lat': 'latina', 'pol': 'polština','ita':'italština','ostatní cizí jazyky':'ostatní cizí jazyky'
}

In [236]:
historicky_vyvoj = preklady.group_by(
    ["rok","041_h"]
).len().sort(by=['rok','len'],descending=[False,True]).unique(
    subset=['rok'],keep="first"
).join(
    preklady_celkem, on='rok', how='left'
).with_columns((pl.col('prekladu_celkem') - pl.col('len')).alias('ostatní jazyky')).rename(
    {'len':'nejzastoupenější jazyk'}
).join(predfiltr.group_by('rok').len().rename({'len':'všechny'}), on='rok', how='left').with_columns(
    pl.col('nejzastoupenější jazyk') / pl.col('všechny')
).with_columns(
    pl.col('ostatní jazyky') / pl.col('všechny')
) #.with_columns(
  #  pl.col('041_h').replace_strict(nahradit)
#)

historicky_vyvoj.filter(pl.col('prekladu_celkem') < 80).tail()

rok,041_h,nejzastoupenější jazyk,prekladu_celkem,ostatní jazyky,všechny
i64,str,f64,u32,f64,u32
1870,"""ger""",0.139535,75,0.109635,301
1871,"""ger""",0.109023,68,0.146617,266
1877,"""ger""",0.098214,77,0.130952,336
1878,"""ger""",0.101124,67,0.087079,356
1879,"""ger""",0.076023,68,0.122807,342


In [238]:
historicky_vyvoj.filter(pl.col("rok").is_between(1935,1950))

rok,041_h,nejzastoupenější jazyk,prekladu_celkem,ostatní jazyky,všechny
i64,str,f64,u32,f64,u32
1935,"""eng""",0.045066,501,0.102504,3395
1936,"""eng""",0.051386,641,0.111676,3931
1937,"""eng""",0.048477,663,0.119797,3940
1938,"""eng""",0.052239,478,0.08496,3484
1939,"""eng""",0.051177,390,0.094509,2677
1940,"""ger""",0.053405,505,0.089291,3539
1941,"""ger""",0.076259,531,0.081918,3357
1942,"""ger""",0.123358,350,0.076528,1751
1943,"""ger""",0.130137,218,0.035769,1314
1944,"""ger""",0.100979,207,0.025704,1634


In [248]:
vyvoj_top = historicky_vyvoj.select(pl.col(["rok","041_h","nejzastoupenější jazyk"])).rename({"nejzastoupenější jazyk":"podil"})
vyvoj_rest = historicky_vyvoj.select(pl.col(['rok','ostatní jazyky'])).rename({'ostatní jazyky':'podil'}).with_columns(pl.lit('ostatní cizí jazyky').alias("041_h"))
historicky_vyvoj2 = pl.concat([vyvoj_rest, vyvoj_top], how="diagonal").sort(by='rok').with_columns(
  pl.col('041_h').replace_strict(nahradit)
)

In [260]:
list(nahradit.values())

['angličtina',
 'němčina',
 'slovenština',
 'ruština',
 'francouzština',
 'latina',
 'polština',
 'italština',
 'ostatní cizí jazyky']

In [406]:
alt.Chart(alt_friendly(historicky_vyvoj2.filter(pl.col('rok').is_between(1900,2024))),
         title="Jak velká část publikací byly překlady a z čeho", width=350
         ).mark_bar(width=2).encode(
    alt.X('rok:T'),
    alt.Y('podil:Q'),
    alt.Color('041_h:N', sort=alt.Sort(list(nahradit.values()))),
    alt.Order('041_h:N')
)

In [43]:
preklady.filter((pl.col('041_h') == 'ger') & (pl.col('rok') == 1993))

leader,001,100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba,041_ind1,041_a,041_h,041_b,041_k,041_g,041_f,041_d,041_e,041_j,041_n,041_m
str,str,str,str,str,list[str],str,str,list[str],str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str,str,list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,str
""" nam a22 4500""","""np9305970""","""1""","""Vandenberg, Patricia,""","""jn20000810141""","[""aut""]","""1921-2007""",,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kčs 9,90 (doporučená cena)""]","[""80-7116-699-5""]",,,,,,,"""1""","""0""","""Dítě z panského domu""",,"""Patricia Vandenbergová ; Z něm…",,,,,,"[""59 s. ;""]",,"[""20 cm""]",,,,…,"[""jk01063267""]",,,,,,,,,,,,,,,,,,,,,,1993,59,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306086""","""1""","""Behrendt, Leni,""","""xx0022089""","[""aut""]","""1894-1968""",,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-648-0""]",,,,,,,"""1""","""0""","""Zámeček v lese""",,"""Leni Behrendtová ; Z něm. přel…",,,,,,"[""62 s. ;""]",,"[""20 cm""]",,,,…,"[""jk01132003""]",,,,,,,,,,,,,,,,,,,,,,1993,62,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306090""","""1""","""Brink, Karina""","""jx20041210018""","[""aut""]",,,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-649-9""]",,,,,,,"""1""","""0""","""Ztracená minulost""",,"""Karina Brinková ; Z něm. přel.…",,,,,,"[""60 s. ;""]",,"[""20 cm""]",,,,…,"[""jx20031205009""]",,,,,,,,,,,,,,,,,,,,,,1993,60,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 a 4500""","""np9306136""","""1""","""Simmel, Johannes Mario,""","""jn20000605033""","[""aut""]","""1924-2009""",,,,,"""940120s1993 xr 0…","""(brož.)""",,"[""80-208-0261-4""]",,,,,,,"""1""","""0""","""Na jaře zazpívá skřivan naposl…",,"""Johannes Mario Simmel ; [z něm…",,,,,,"[""500 s. ;""]",,"[""20 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,1993,500,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306186""","""1""","""Myrenburg, Myra""","""jx20040720097""","[""aut""]",,,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-476-3""]",,,,,,,"""1""","""0""","""Místo ve svém srdci""",,"""Myra Myrenburgová ; Z něm. pře…",,,,,,"[""54 s. ;""]",,"[""20 cm""]",,,,…,"[""jx20040906007"", ""jx20051202064""]",,,,,,,,,,,,,,,,,,,,,,1993,54,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306187""","""1""","""Vandenberg, Patricia,""","""jn20000810141""","[""aut""]","""1921-2007""",,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-700-2""]",,,,,,,"""1""","""0""","""Den začal tak krásně""",,"""Patricia Vandenbergová ; Z něm…",,,,,,"[""60 s. ;""]",,"[""20 cm""]",,,,…,"[""jx20031130175""]",,,,,,,,,,,,,,,,,,,,,,1993,60,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306188""","""1""","""Jensen, Manuela""","""ola2007404864""","[""aut""]",,,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-537-9""]",,,,,,,"""1""","""0""","""Smrtí to nekončí""",,"""Manuela Jensenová ; Z něm. pře…",,,,,,"[""62 s. ;""]",,"[""20 cm""]",,,,…,"[""jx20040721038""]",,,,,,,,,,,,,,,,,,,,,,1993,62,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306189""","""1""","""Sommer, Erika""","""jx20040721062""","[""aut""]",,,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-538-7""]",,,,,,,"""1""","""0""","""Podivný dům""",,"""Erika Sommerová ; Z něm. přel.…",,,,,,"[""62 s. ;""]",,"[""20 cm""]",,,,…,"[""xx0113454""]",,,,,,,,,,,,,,,,,,,,,,1993,62,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306190""","""1""","""Vandenberg, Patricia,""","""jn20000810141""","[""aut""]","""1921-2007""",,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-590-5""]",,,,,,,"""1""","""0""","""Jsem bez viny""",,"""Patricia Vandenbergová ; Z něm…",,,,,,"[""56 s. ;""]",,"[""20 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,,,1993,56,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,
""" nam a22 4500""","""np9306411""","""1""","""Barner, Gerhard F.""","""jx20040908007""","[""aut""]",,,,,,"""940120s1993 xr u0…","""(brož.) :""","[""Kč 9,90 (doporučená cena)""]","[""80-7116-576-X""]",,,,,,,"""1""","""0""","""Na horké štrece""",,"""G.F. Barner ; z něm. přel. Mil…",,,,,,"[""61 s. ;""]",,"[""20 cm""]",,,,…,"[""ola364130""]",,,,,,,,,,,,,,,,,,,,,,1993,61,"""brožovaná""","""1""","[""cze""]","""ger""",,,,,,,,,


## Pokus č. 1, to jsem to ještě neuměl tak báječně

In [5]:
nahradit = {
    'eng': 'angličtina', 'ger': 'němčina', 'slo': 'slovenština', 'rus': 'ruština', 'fre': 'francouzština', 'lat': 'latina', 'pol': 'polština','ita':'italština'
}

In [6]:
nejzastoupenejsi_jazyky = preklady.group_by('041_h').len().sort('len', descending=True)
nejzastoupenejsi_jazyky = nejzastoupenejsi_jazyky.get_column('041_h').to_list()
nejzastoupenejsi_jazyky = nejzastoupenejsi_jazyky[0:7]
print(nejzastoupenejsi_jazyky)
nejzastoupenejsi_jazyky = ['eng','ger','rus','fre','slo']
preklady_po_jazycich = preklady.filter(pl.col("rok").is_between(1900,2020)).group_by(['rok','041_h']).len().rename({'len':'prekladu'})
preklady_po_jazycich = preklady_po_jazycich.join(preklady_celkem, on='rok', how='full')
preklady_po_jazycich = preklady_po_jazycich.with_columns((pl.col('prekladu') / pl.col('prekladu_celkem')).alias('podíl'))
preklady_po_jazycich = preklady_po_jazycich.with_columns(pl.col('041_h').cast(str))
preklady_po_jazycich = preklady_po_jazycich.filter(pl.col("041_h").is_in(nejzastoupenejsi_jazyky)).sort('rok').rename({'041_h':'jazyk'}).with_columns(pl.col("rok").map_elements(lambda x: datetime.date(year=int(x), month=1, day=1), return_dtype=pl.Date).cast(pl.Datetime))
preklady_po_jazycich = preklady_po_jazycich.with_columns(pl.col('jazyk').replace_strict(nahradit))
preklady_po_jazycich

['eng', 'ger', 'rus', 'fre', 'pol', 'ita', 'slo']


rok,jazyk,prekladu,rok_right,prekladu_celkem,podíl
datetime[μs],str,u32,i64,u32,f64
1900-01-01 00:00:00,"""slovenština""",1,1900,178,0.005618
1900-01-01 00:00:00,"""angličtina""",41,1900,178,0.230337
1900-01-01 00:00:00,"""francouzština""",42,1900,178,0.235955
1900-01-01 00:00:00,"""němčina""",29,1900,178,0.162921
1900-01-01 00:00:00,"""ruština""",17,1900,178,0.095506
1901-01-01 00:00:00,"""němčina""",23,1901,211,0.109005
1901-01-01 00:00:00,"""ruština""",21,1901,211,0.099526
1901-01-01 00:00:00,"""slovenština""",1,1901,211,0.004739
1901-01-01 00:00:00,"""angličtina""",34,1901,211,0.161137
1901-01-01 00:00:00,"""francouzština""",37,1901,211,0.175355


In [7]:
titulek = "Ze kterých jazyků se kdy překládaly knihy do češtiny"
podtitulek = ["Podíl jazyků na překladové literatuře – krásné, naučné i odborné."] #,"Angličtina a francouzština (téměř) zmizely jen během 2. světové války,","kdy dominovaly překlady z němčiny. Po osvobození raketově narostl podíl ruštiny."]
kredit = "zdroj dat: Česká národní bibliografie · vizualizace: iROZHLAS.cz · 2025"

In [8]:
podily_faceted = alt.Chart(preklady_po_jazycich.to_pandas(), title={'text': titulek}, width=300).mark_area().encode(
    alt.X("rok:T", 
          title=None,
          axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
    ),
    alt.Y('podíl:Q', 
          axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6'), 
          title=None
    ),
    alt.Color("jazyk:N", 
              scale=alt.Scale(range=['#687fa9','#84c0e4','#e3d83b','#b45058','#789256','#E0DAB5',]), ## '# ['#a7ddd6','#001f80','#e6e6e4','#e5db00','#e49996','#9cc002']), 
              title=None, 
              legend=None
    ),
    row=alt.Row(
        "jazyk:N", 
        title=None, 
        spacing=15, 
        header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap')
    )
).resolve_axis(
    x='independent'
).properties(height=60, width=300).configure_view(stroke='transparent')

podily_faceted

## Počty jednotlivých jazyků

In [46]:
rocni_pocty = preklady.filter(pl.col("rok") >= 1800).group_by("rok").agg(pl.col("041_h").unique().len()).sort(by="rok")
rocni_pocty

rok,041_h
i64,u32
1801,2
1802,2
1803,1
1804,3
1805,2
1806,2
1807,3
1808,2
1809,2
1810,1


In [48]:
alt_friendly(rocni_pocty)

Unnamed: 0,rok,041_h
0,1801-01-01,2
1,1802-01-01,2
2,1803-01-01,1
3,1804-01-01,3
4,1805-01-01,2
...,...,...
220,2021-01-01,46
221,2022-01-01,46
222,2023-01-01,47
223,2024-01-01,48


In [70]:
alt.Chart(alt_friendly(rocni_pocty), width=300,
         title=["Z kolika jednotlivých jazyků se překládaly knihy"]
         ).mark_area().encode(
    alt.X("rok:T", axis=alt.Axis(domainOpacity=0, tickCount=4, tickColor='#DCDDD6'), title=None),
    alt.Y("041_h", axis=alt.Axis(orient='right', tickCount=4, domainOpacity=0, tickColor='#DCDDD6'), title=None)
)