In [1]:
import os
import datetime
import json
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu
from src.me_to_neurazi import me_to_neurazi

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","650.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","653.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("stran") >= 30)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a","245_p"], keep="first")
print(len(df))

795736
506565


## Kdy se vyplnovalo pole 655_a?

In [4]:
df.filter(~pl.col("655_a").is_null()).group_by("rok").len().sort(by="rok").join(
    df.group_by('rok').len(), on='rok', how='left'
).with_columns((pl.col("len") / pl.col("len_right")).alias('vyplnenost')).tail(50)

rok,len,len_right,vyplnenost
i64,u32,u32,f64
1976,623,2825,0.220531
1977,669,2892,0.231328
1978,710,2935,0.241908
1979,901,3218,0.279988
1980,1037,3321,0.312255
1981,1036,3164,0.327434
1982,1449,3380,0.428698
1983,2483,3884,0.639289
1984,2523,3989,0.632489
1985,2534,3813,0.664569


In [5]:
df = df.filter(pl.col("rok").is_between(1989,2024))

In [6]:
df.filter(pl.col("245_a").str.contains("Každý den je nový")).explode("655_a")

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,653_a,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""autobiografické komiksy""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""deníky""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""autobiographical comics""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""diaries""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""citáty""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""aforismy""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""quotations""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""aphorisms""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""


In [7]:
df.filter(pl.col("245_a").str.contains("Pod dekou"))

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,653_a,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Thompson, Craig,""","""xx0037934""","[""aut"", ""ill""]","""1975-""",,,,,"""nkc20051632284""",""" nam a22 a 4500""","""051123s2005 xr a g 0…","""(váz.) :""","[""Kč 590,00""]","[""80-7341-603-4""]",,,,,,,"""1""","""0""","""Pod dekou""","""ilustrovaný román /""","""Craig Thompson ; [z anglického…",,,,,,"[""582 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""komiksy"", … ""comics""]","[""fd131796"", ""fd131978"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,582,"""pevná"""


In [8]:
zebricek = df.explode("655_a").group_by("655_a").len().sort(by="len",descending=True)
zebricek.filter(pl.col('len') >= 200)

655_a,len
str,u32
"""příručky""",32562
"""handbooks and manuals""",24458
"""učebnice vysokých škol""",21487
"""publikace pro děti""",19853
"""monografie""",18122
"""children's literature""",16902
"""monographs""",16491
"""textbooks (higher)""",13582
"""populárně-naučné publikace""",13514
"""studie""",12703


In [9]:
zebricek.filter(pl.col("655_a").str.contains("dívč"))

655_a,len
str,u32
"""dívčí romány""",210


In [10]:
df.sample(20)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,653_a,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Linda, Bohdan""","""jn20001005135""","[""aut""]",,,,,,"""cpk20000964237""",""" nam a22 a 4500""","""000517s1999 xr a e 0…","""(brož.)""",,"[""80-7194-239-1""]",,,,,,,"""1""","""0""","""Výroční zpráva""","""dílčí část II výzkumného záměr…","""zodpovědný řešitel Bohdan Lind…",,,,,,"[""161 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,"[""7"", ""7""]","[""sborníky"", ""výzkumné zprávy""]","[""fd163935"", ""fd131869""]","[""czenas"", ""czenas""]",,,,"[""1"", ""1""]","[""Čapek, Jan,"", ""Janovec, Jan""]","[""aut"", ""aut""]","[""1946-"", null]","[""jn20001005134"", ""jn20001005133""]",,,,,,,,,,,,,,,,,,,,,,1999,161,"""brožovaná"""
"""1""","""Thompson, Jim,""","""mzk2003186901""","[""aut""]","""1906-1977""",,,,,"""np9541749""",""" nam a22 4500""","""950622s1995 xr g u0…","""(váz.) :""","[""Kč 79,00""]","[""80-85794-31-4""]",,,,,,,"""1""","""0""","""Vrah ve mně""",,"""Jim Thompson ; z angličtiny př…",,,,,,"[""158 s. ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""detektivní romány"", … ""detective novels""]","[""fd131796"", ""fd132010"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"[""1"", ""1"", ""1""]","[""Batrla, Libor"", ""Harrison, Russell"", ""Hubáčková, Alexandra,""]","[""art"", ""aui"", ""trl""]","[null, null, ""1964-""]","[""jx20040610001"", null, ""mzk2008469130""]",,,,,,,,,,,,,,,,,,,,,,1995,158,"""pevná"""
"""1""","""Haensel, Hubert,""","""jn19990003052""","[""aut""]","""1952-""",,,,,"""nkc20152692014""",""" nam a22 i 4500""","""150409t20152015xr g 0…","""(brožováno)""",,"[""978-80-243-6604-3""]",,,,,,,"""1""","""0""","""Život jezerního Charana""",,"""Hubert Haensel ; z německého o…",,,,,,"[""64 strany ;""]",,"[""22 cm""]",,,,…,,"[""7"", ""9""]","[""vědecko-fantastické povídky"", ""science fiction short stories""]","[""fd133845"", null]","[""czenas"", ""eczenas""]",,,,"[""1""]","[""Ryčl, František,""]","[""trl""]","[""1961-""]","[""js20030317007""]",,,,,,,,,,,,,,,,,,,,,,2015,64,"""brožovaná"""
"""1""","""Paolini, Christopher,""","""xx0025816""","[""aut""]","""1983-""",,,,,"""nkc20122360173""",""" nam a22 a 4500""","""120412s2012 xr a g 0…","""(váz.)""",,"[""978-80-253-1366-4""]",,,,,,,"""1""","""0""","""Odkaz Dračích jezdců""",,"""Christopher Paolini ; [z angli…","[""Čtvrtý díl,""]","""Inheritance : pevnost duší""",,,,"[""672 s. :""]","[""il. ;""]","[""24 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""fantasy romány"", … ""fantasy novels""]","[""fd131796"", ""fd184199"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2012,672,"""pevná"""
"""1""","""Jackson, Lisa,""","""xx0053579""","[""aut""]","""1952-""",,,,,"""nkc20112219401""",""" nam a22 a 4500""","""110803s2011 xr g 0…","""(váz.)""",,"[""978-80-7303-658-4""]",,,,,,,"""1""","""0""","""Chladná krev""",,"""Lisa Jackson ; [z anglického o…",,,,,,"[""403 s. ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""thrillery (romány)"", … ""thrillers (novels)""]","[""fd131796"", ""fd184206"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,403,"""pevná"""
"""1""","""Čapounová, Dana""","""mzk2003195701""","[""aut""]",,,,,,"""cpk20031252065""",""" cam a22 a 4500""","""030804s2003 xr a f 0…","""(brož.)""",,"[""80-214-2410-9""]",,,,,,,"""1""","""0""","""Využití pektolytických enzymů …","""Use of pectic enzymes during p…","""Dana Čapounová""",,,,,,"[""32 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,"[""7"", ""9""]","[""teze"", ""abstracts""]","[""fd133701"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2003,32,"""brožovaná"""
"""1""","""Hynek, Rudolf Maria,""","""jk01043443""","[""aut""]","""1883-1952""",,,,,"""nkc20071718277""",""" nam a22 a 4500""","""070517s2007 xr ac e 0…","""(v knize neuvedeno ;""",,"[""978-80-239-9995-2""]",,,,,,,"""1""","""0""","""Živý obraz Ukřižovaného ""Stigm…",,"""R.W. Hynek""",,,,,,"[""143 s. :""]","[""il., portréty ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","[""pojednání"", ""citáty"", … ""reminiscences""]","[""fd133056"", ""fd131832"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"[""1""]","[""Sýkorová, Jarmila""]","[""ctb""]",,"[""jx20070518015""]",,,,,,,,,,,,,,,,,,,,,,2007,143,
"""1""","""Budinský, Petr,""","""jk01020160""","[""aut""]","""1938-""",,,,,"""np9319171""",""" nam a22 4500""","""940418s1992 xr abf 0…","""(brož.)""",,"[""80-85321-05-X""]",,,,,,,"""1""","""0""","""Mladohalštatský až časnělaténs…","""[publikace připravena v rámci …","""Peter Budinský""",,,,,,"[""152 s., lx s. obr. příl :""]","[""il., mapy ;""]","[""30 cm""]",,,,…,"[""archeologické nálezy"", ""archeologické sbírky"", … ""laténská doba""]","[""7"", ""7""]","[""soupisy"", ""přehledy""]","[""fd133560"", ""fd133202""]","[""czenas"", ""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1992,152,"""brožovaná"""
"""1""","""Bayerová, Vladimíra,""","""mzk2006342899""","[""aut""]","""1976-""",,,,,"""nkc20132509335""",""" nam a22 a 4500""","""131025s2013 xr a e p 0…","""(Vysoké učení technické ;""",,"[""978-80-214-4798-1""]",,,,,,,"""1""","""0""","""Makroekonomie 1""","""studijní text pro denní a komb…","""Vladimíra Kučerová""",,,,,,"[""126 s. :""]","[""il. ;""]","[""30 cm""]",,,,…,,"[""7"", ""9""]","[""učebnice vysokých škol"", ""textbooks (higher)""]","[""fd133772"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,126,
"""1""","""Czesaný, Slavoj,""","""jn20000620044""","[""aut""]","""1940-""",,,,,"""nkc20070534207""",""" nam a22 4500""","""910704s1991 xr f 0…","""(brož.)""",,"[""80-7076-408-2""]",,,,,,,"""1""","""0""","""Analýza vývoje československé …",,"""zpracovali Slavoj Czesaný ... …",,,,,,"[""64 s. ;""]",,"[""21 cm""]",,,,…,,"[""7""]","[""studie""]","[""fd133597""]","[""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1991,64,"""brožovaná"""


In [11]:
vyvoj = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [12]:
vyvoj.filter(pl.col("celkem") >= 100).head(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""romány pro ženy""",1,"""romány pro ženy""",596,0.001678,597
"""women's novels""",1,"""women's novels""",596,0.001678,597
"""fantasy comics""",1,"""fantasy comics""",259,0.003861,260
"""fantasy komiksy""",1,"""fantasy komiksy""",259,0.003861,260
"""akční a dobrodružné komiksy""",1,"""akční a dobrodružné komiksy""",255,0.003922,256
"""action and adventure comics""",1,"""action and adventure comics""",252,0.003968,253
"""superhrdinské komiksy""",1,"""superhrdinské komiksy""",221,0.004525,222
"""superhero comics""",1,"""superhero comics""",219,0.004566,220
"""autorské knihy""",1,"""autorské knihy""",165,0.006061,166
"""authors' books""",1,"""authors' books""",163,0.006135,164


In [13]:
vyvoj.filter(pl.col("celkem") >= 100).tail(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""Czech prose""",147,"""Czech prose""",429,0.342657,576
"""detective stories""",27,"""detective stories""",78,0.346154,105
"""obrazové publikace""",372,"""obrazové publikace""",1064,0.349624,1436
"""české romány""",751,"""české romány""",2097,0.358131,2848
"""texts""",38,"""texts""",101,0.376238,139
"""catalogs""",66,"""catalogs""",175,0.377143,241
"""popular works""",742,"""popular works""",1966,0.377416,2708
"""fotografické publikace""",556,"""fotografické publikace""",1459,0.381083,2015
"""autobiografické prózy""",44,"""autobiografické prózy""",114,0.385965,158
"""Czech fairy tales""",43,"""Czech fairy tales""",109,0.394495,152


In [14]:
df.select(pl.col("653_a")).drop_nulls().sample(50)

653_a
list[str]
"[""dějiny"", ""humor"", ""satira""]"
"[""taneční věda""]"
"[""filozofie ducha"", ""mravní pojmy"", … ""speciální metafyzika""]"
"[""zeměpis""]"
"[""česko-anglická konverzace""]"
"[""MS-DOS"", ""Norton Commander"", … ""osobní počítače""]"
"[""právní stát"", ""státní moc"", … ""státní zřízení""]"
"[""stol. 20., léta 40."", ""česká poezie""]"
"[""fyzika"", ""učivo základních škol""]"
"[""stol. 20., léta 60.-90."", ""česká próza""]"


In [15]:
vyvoj2 = df.filter(pl.col("rok").is_between(2000,2003)).explode("653_a").group_by("653_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("653_a").group_by("653_a").len(), on="653_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")
vyvoj2.filter(pl.col("celkem") > 5)

653_a,len,653_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
,0,"""jazyková úroveň B2""",7,0.0,7
,0,"""jazyková úroveň B1""",14,0.0,14
,0,"""jazyková úroveň A2""",14,0.0,14
,0,,32189,0.0,32189
,0,"""jazyková úroveň A1""",15,0.0,15
,0,"""literatura new adult""",50,0.0,50
,31258,,0,0.0,31258
"""denní četba""",13,"""denní četba""",44,0.295455,57


In [16]:
vyvoj3 = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [17]:
vyvoj3.filter((pl.col("len") > 1) & (pl.col('len_right') > 1) & (pl.col("celkem") > 100)).sort(by='rozdil',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""povídky""",112,"""povídky""",6,18.666667,118
"""milostné povídky""",492,"""milostné povídky""",37,13.297297,529
"""lyrická poezie""",187,"""lyrická poezie""",20,9.35,207
"""lyric poetry""",148,"""lyric poetry""",20,7.4,168
"""právní předpisy""",225,"""právní předpisy""",32,7.03125,257
"""učebnice vysokých škol""",3224,"""učebnice vysokých škol""",536,6.014925,3760
"""legal regulations""",153,"""legal regulations""",29,5.275862,182
"""studie""",2336,"""studie""",484,4.826446,2820
"""vědecko-fantastické povídky""",229,"""vědecko-fantastické povídky""",50,4.58,279
"""dvojjazyčné překladové slovník…",145,"""dvojjazyčné překladové slovník…",32,4.53125,177


In [18]:
vyvoj3.filter(pl.col("celkem") > 200).filter(pl.col('rozdil') > 1).sort(by='rozdil',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""milostné povídky""",492,"""milostné povídky""",37,13.297297,529
"""lyrická poezie""",187,"""lyrická poezie""",20,9.35,207
"""právní předpisy""",225,"""právní předpisy""",32,7.03125,257
"""učebnice vysokých škol""",3224,"""učebnice vysokých škol""",536,6.014925,3760
"""studie""",2336,"""studie""",484,4.826446,2820
"""vědecko-fantastické povídky""",229,"""vědecko-fantastické povídky""",50,4.58,279
"""textbooks (higher)""",2249,"""textbooks (higher)""",533,4.219512,2782
"""učebnice""",392,"""učebnice""",113,3.469027,505
"""dobrodružné povídky""",315,"""dobrodružné povídky""",101,3.118812,416
"""studies""",1454,"""studies""",470,3.093617,1924


In [19]:
top = ['komiksy','young adult',"erotické romány", "rozhovory"]
flop = ["učebnice","slovníky","právní předpisy","encyklopedie"]

In [20]:
def podil_zanru(zanr, rok=1989):
    return df.filter(pl.col("rok") >= rok).explode("655_a").filter(
        pl.col("655_a").str.contains("(?i)" + zanr)
    ).group_by(
        "rok"
    ).len(
    ).join(df.filter(pl.col("rok") >= rok).drop_nulls(subset=["655_a"]).group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).with_columns(pl.lit(zanr).alias("zanr"))

In [21]:
df.explode("655_a").filter(pl.col('655_a') == "erotické romány").group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""Jezdci apokalypsy""",4
"""Twisted""",4
"""Vlci zvěrokruhu""",4
"""Barvy lásky""",4
"""Královská sága plná sexu""",4
"""Hříšné odstíny vášně""",4
"""Savage Trilogy""",3
"""Sin trilogy""",3
"""Hotel v Paříži""",3
"""Crossfire""",3


In [22]:
def co_to_je(nazev):
    return df.filter(pl.col("245_a").str.contains(nazev)).select("655_a").explode("655_a").to_series().to_list()

In [23]:
co_to_je("Neztratit se sama sobě")

['rozhovory', 'interviews']

In [24]:
co_to_je("Otevřené hranice")

['naučné komiksy', 'nonfiction comics']

In [25]:
podil_zanru("nonfiction comics")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
,1989,3115,,"""nonfiction comics"""
,1990,3220,,"""nonfiction comics"""
,1991,3598,,"""nonfiction comics"""
,1992,4452,,"""nonfiction comics"""
,1993,5949,,"""nonfiction comics"""
,1994,6067,,"""nonfiction comics"""
,1995,6784,,"""nonfiction comics"""
,1996,7014,,"""nonfiction comics"""
,1997,7395,,"""nonfiction comics"""
,1998,7210,,"""nonfiction comics"""


In [26]:
podil_zanru("komiks")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
4,1989,3115,0.001284,"""komiks"""
6,1990,3220,0.001863,"""komiks"""
8,1991,3598,0.002223,"""komiks"""
8,1992,4452,0.001797,"""komiks"""
5,1993,5949,0.00084,"""komiks"""
11,1994,6067,0.001813,"""komiks"""
8,1995,6784,0.001179,"""komiks"""
6,1996,7014,0.000855,"""komiks"""
10,1997,7395,0.001352,"""komiks"""
10,1998,7210,0.001387,"""komiks"""


In [27]:
top_data = pl.concat([podil_zanru(z, rok=2000) for z in top])
flop_data = pl.concat([podil_zanru(z, rok=2000) for z in flop])

In [28]:
# flop_data = flop_data.with_columns(pl.col("zanr").replace({"právní předpisy":"práv. předp.","encyklopedie":"encykl."}))

In [133]:
top_graf = alt.Chart(
    alt_friendly(top_data),title="Čeho vychází víc než v roce 2000…", width=55, height=kredity['vyska_nizkych']
).mark_bar(color='#70871E').encode(
    alt.X("rok:T", title=None, axis=alt.Axis(tickCount=1)),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6'), scale=alt.Scale(domainMin=0), ),
    alt.Column("zanr", title=None, sort=top,
           header=alt.Header(labelFont='Asap'))
).resolve_axis(y="independent",x="independent").resolve_scale(y="independent")

flop_graf = alt.Chart(
    alt_friendly(flop_data),title="…a čeho míň", width=48, height=55
).mark_bar(color='#DB842F').encode(
    alt.X("rok:T", title=None, axis=alt.Axis(tickCount=1)),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6')),
    alt.Column("zanr", title=None,
           header=alt.Header(labelFont='Asap'), sort=flop)
).resolve_axis(y="independent",x="independent").resolve_scale(y="independent")

rozdily_21_stoleti = alt.vconcat(top_graf, flop_graf).configure_view(stroke='transparent').configure_axis(grid=False, domain=False)
rozdily_21_stoleti

In [30]:
me_to_neurazi(rozdily_21_stoleti, soubor="03_trendy", kredity=kredity['default'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/03_trendy.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/03_trendy.svg" width="100%" alt="Omlouváme se, ale alternativní text se nepodařilo vygenerovat. Texty v grafu by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [31]:
podil_zanru("pohádky")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
6,1989,3115,0.001926,"""pohádky"""
10,1990,3220,0.003106,"""pohádky"""
15,1991,3598,0.004169,"""pohádky"""
52,1992,4452,0.01168,"""pohádky"""
58,1993,5949,0.00975,"""pohádky"""
42,1994,6067,0.006923,"""pohádky"""
56,1995,6784,0.008255,"""pohádky"""
38,1996,7014,0.005418,"""pohádky"""
68,1997,7395,0.009195,"""pohádky"""
52,1998,7210,0.007212,"""pohádky"""


In [32]:
podil_zanru("erotick")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
,1989,3115,,"""erotick"""
9.0,1990,3220,0.002795,"""erotick"""
9.0,1991,3598,0.002501,"""erotick"""
4.0,1992,4452,0.000898,"""erotick"""
6.0,1993,5949,0.001009,"""erotick"""
9.0,1994,6067,0.001483,"""erotick"""
,1995,6784,,"""erotick"""
10.0,1996,7014,0.001426,"""erotick"""
14.0,1997,7395,0.001893,"""erotick"""
3.0,1998,7210,0.000416,"""erotick"""


In [33]:
podil_zanru("dívčí romány")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
2.0,1989,3115,0.000642,"""dívčí romány"""
1.0,1990,3220,0.000311,"""dívčí romány"""
4.0,1991,3598,0.001112,"""dívčí romány"""
2.0,1992,4452,0.000449,"""dívčí romány"""
1.0,1993,5949,0.000168,"""dívčí romány"""
,1994,6067,,"""dívčí romány"""
,1995,6784,,"""dívčí romány"""
,1996,7014,,"""dívčí romány"""
1.0,1997,7395,0.000135,"""dívčí romány"""
,1998,7210,,"""dívčí romány"""


In [34]:
podil_zanru("populárně-naučné publikace")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
41,1989,3115,0.013162,"""populárně-naučné publikace"""
40,1990,3220,0.012422,"""populárně-naučné publikace"""
52,1991,3598,0.014452,"""populárně-naučné publikace"""
65,1992,4452,0.0146,"""populárně-naučné publikace"""
119,1993,5949,0.020003,"""populárně-naučné publikace"""
131,1994,6067,0.021592,"""populárně-naučné publikace"""
203,1995,6784,0.029923,"""populárně-naučné publikace"""
164,1996,7014,0.023382,"""populárně-naučné publikace"""
209,1997,7395,0.028262,"""populárně-naučné publikace"""
253,1998,7210,0.03509,"""populárně-naučné publikace"""


In [35]:
def grafik(z, funkce=podil_zanru):
    return alt.Chart(alt_friendly(funkce(z))).mark_line().encode(
        alt.X("rok:T",axis=alt.Axis(title=None)),
        alt.Y('podil:Q',axis=alt.Axis(
            labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6',title=None
        ))
    ).configure_view(stroke='transparent')

In [36]:
grafik('erotické')

In [37]:
grafik('CD-ROM')

In [38]:
grafik("dvojjazyčná vydání")

In [39]:
grafik("fantasy romány"	)

In [40]:
grafik("učebnice základních škol")

In [41]:
grafik("aforismy")

In [42]:
grafik("rusk")

In [43]:
grafik("young adult")

In [44]:
grafik("příručky")

In [45]:
grafik("dívčí romány")

In [46]:
grafik("komiks")

In [47]:
grafik("detektiv")

In [48]:
grafik("rozhovory")

In [49]:
grafik("deníky")

In [50]:
grafik("autobiogr")

In [51]:
grafik("encyclopedias")

In [52]:
grafik("učebnice vysokých škol")

In [53]:
grafik("kuchař")

## Kuchařky

In [55]:
kucharky = df.explode("655_a").filter(
        pl.col("655_a").str.contains("kuchař")
    )

In [56]:
kucharky.sample(10)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,653_a,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Fúsková, Veronika""","""ola20221138630""","[""aut"", ""pbl"", ""pht""]",,,,,,"""nkc20213375763""",""" nam a22 i 4500""","""211122s2021 xr a e f 0…","""(brožováno)""",,"[""978-80-11-00781-2""]",,,,,,,"""1""","""0""","""Kuchařka pro líné rodiče""",,"""Veronika Fúsková""",,,,,,"[""105 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,105,"""brožovaná"""
"""1""","""Růžičková, Helena,""","""jn19990209735""","[""aut""]","""1936-2004""",,,,,"""np9428278""",""" nam a22 4500""","""940824s1994 xr acf 0…","""(váz.)""",,"[""80-900707-8-7""]",,,,,,,"""1""","""0""","""Kuchařkou proti své vůli""","""recepty, rady, nápady, vzpomín…","""Helena Růžičková""",,,,,,"[""149 s., [8] s. barev. příl. :""]","[""il., portréty ;""]","[""20 cm""]",,,,…,"[""kuchařské speciality"", ""tradiční jídla"", ""zdravá jídla""]","[""7"", ""7""]","""kuchařské recepty""","[""fd133209"", ""fd132687""]","[""czenas"", ""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1994,149,"""pevná"""
"""1""","""Rademacker, Birgit""","""mzk2007401315""","[""aut""]",,,,,,"""nkc20071728414""",""" nam a22 a 4500""","""070718s2007 xr a e f 0…","""(brož.)""",,"[""978-80-247-2291-7""]",,,,,,,"""1""","""0""","""50 receptů pro 1 pánev""",,"""Birgit Rademacker ; fotografie…",,,,,,"[""62 s. :""]","[""barev. il. ;""]","[""20 cm""]",,,,…,,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,"[""1""]","[""Jessen, Maike""]","[""pht""]",,"[""ola2007404533""]",,,,,,,,,,,,,,,,,,,,,,2007,62,"""brožovaná"""
"""1""","""Seitz, Paul""","""ola2009507950""","[""aut""]",,,,,,"""np9418493""",""" nam a22 4500""","""940824s1994 xr a 0…","""(brož.)""",,"[""80-85805-13-8""]",,,,,,,"""1""","""0""","""Bylinky na zahrádce a v kuchyn…","""pěstování, sklizeň, použití /""","""Paul Seitz ; [z německého orig…",,,,,,"[""71 s. :""]","[""il. (některé barev.) ;""]","[""21 cm""]",,,,…,"[""bylinky"", ""kořenné byliny""]","[""7"", ""7""]","""kuchařské recepty""","[""fd133209"", ""fd132687""]","[""czenas"", ""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1994,71,"""brožovaná"""
"""1""","""Weigel, Jaroslav,""","""jk01151466""","[""aut"", ""ill""]","""1931-2019""",,,,,"""np9312448""",""" nam a22 4500""","""940120s1992 xr a e u0…","""(váz.)""",,"[""80-85192-49-7""]",,,,,,,"""1""","""0""","""Malá psí kuchařka""","""pro dospělé psy malé, střední …","""[text], ilustr. a graf. úprava…",,,,,,"[""79 s. :""]","[""obr., tab. ;""]","[""17 cm""]",,,,…,,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1992,79,"""pevná"""
"""1""","""Katz, Sandor Ellix,""","""xx0192312""","[""aut""]","""1962-""",,,,,"""nkc20142650100""",""" nam a22 a 4500""","""141211s2015 xr a e f 0…","""(brož.)""",,"[""978-80-247-5214-3""]",,,,,,,"""1""","""0""","""Síla přírodní fermentace""","""jedinečná chuť & léčivá síla ž…","""Sandor Ellix Katz ; [z anglick…",,,,,,"[""255 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd131864"", ""fd132687"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2015,255,"""brožovaná"""
"""1""","""Doležalová, Alena,""","""mzk2003175510""","[""aut""]","""1958-""",,,,,"""nkc20182987537""",""" nam a22 i 4500""","""180403s2018 xr af e f 0…","""(brožováno)""",,"[""978-80-7322-206-2""]",,,,,,,"""1""","""0""","""Domácí krabičková dieta a „tuk…",,"""Alena Doležalová""",,,,,,"[""205 stran, 8 nečíslovaných stran obrazových příloh :""]","[""barevné ilustrace ;""]","[""23 cm""]",,,,…,,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,205,"""brožovaná"""
"""1""","""Horsáková, Monika,""","""xx0075600""","[""aut""]","""1970-""",,,,,"""nkc20091961844""",""" nam a22 a 4500""","""090604s2009 xr a e f 0…","""(váz.)""",,"[""978-80-7404-024-5""]",,,,,,,"""1""","""0""","""Pod pokličkou 4""","""aby tělo netrpělo /""","""Monika Horsáková, Kamila Teslí…",,,,,,"[""220 s. :""]","[""barev. il. ;""]","[""25 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd131864"", ""fd132687"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,"[""1"", ""1""]","[""Teslíková, Kamila"", ""Strýčková, Jana""]","[""aut"", ""aut""]",,"[""mzk2005306703"", ""mzk2005304469""]",,,,,,,,,,,,,,,,,,,,,,2009,220,"""pevná"""
"""1""","""Devátá, Ivanka,""","""jn19990209138""","[""aut""]","""1935-""",,,,,"""nkc20193148185""",""" cam a22 i 4500""","""191018s2019 xr g 0…","""(vázáno)""",,"[""978-80-267-1640-2""]",,,,,,,"""1""","""0""","""Džungle v kuchyni""",,"""Ivanka Devátá""",,,,,,"[""149 stran ;""]",,"[""20 cm""]",,,,…,,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd133971"", ""fd132428"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019,149,"""pevná"""
"""1""","""Brenn Horová, Anna""",,"[""aut""]",,,,,,"""nkc20172867214""",""" nam a22 i 4500""","""170109t20162016xr a e 0…","""(vázáno)""",,"[""978-80-7459-131-0""]",,,,,,,"""1""","""0""","""Smoothies a ovocné šťávy""",,"""Anna Brenn Horová""",,,,,,"[""123 stran :""]","[""barevné ilustrace ;""]","[""22 cm""]",,,,…,,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2016,123,"""pevná"""


In [57]:
def kuchyne(slovo):
    return kucharky.filter(
        pl.col("245_a").str.contains("(?i)" + slovo) | pl.col("245_a").str.contains("(?i)" + slovo)
    ).group_by(
        "rok"
    ).len(
    ).join(kucharky.group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).fill_null(0)

In [58]:
kuchyne('barbe')

pocet,rok,len_right,podil
u32,i64,u32,f64
0,1989,16,0.0
0,1990,20,0.0
0,1991,39,0.0
0,1992,57,0.0
0,1993,78,0.0
0,1994,80,0.0
0,1995,94,0.0
0,1996,91,0.0
0,1997,95,0.0
0,1998,103,0.0


In [59]:
grafik('barbe', funkce=kuchyne)

In [60]:
grafik('vegan', funkce=kuchyne)

In [61]:
grafik('sous ', funkce=kuchyne)

In [62]:
grafik('svačin', funkce=kuchyne)

In [63]:
grafik('pečiv', funkce=kuchyne)

In [64]:
grafik('chleb', funkce=kuchyne)

In [65]:
grafik('peče', funkce=kuchyne)

In [66]:
grafik('diabet', funkce=kuchyne)

In [67]:
grafik('gril', funkce=kuchyne)

In [68]:
grafik('cukrov', funkce=kuchyne)

In [69]:
grafik('zdrav', funkce=kuchyne)

In [70]:
grafik('bez ', funkce=kuchyne)

In [71]:
kucharky.filter(pl.col("245_a").str.contains("pro")).with_columns(pl.col("245_a").map_elements(lambda x: x.split("pro")[1].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""diabetiky""",25
"""děti""",14
"""každou příležitost""",12
"""každého""",10
"""labužníky""",9
"""každý den""",8
"""začátečníky""",8
"""moderní ženu""",8
"""cukrářskou výrobu""",8
"""zdraví""",7


In [72]:
kucharky.filter(pl.col("245_a").str.contains(" s ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" s ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""láskou""",10
"""Měsícem""",6
"""Ellou""",5
"""dětmi""",4
"""Habadějem""",4
"""fantazií""",4
"""bylinkami""",4
"""konopím""",4
"""potěšením""",3
"""masem""",3


In [73]:
kucharky.filter(pl.col("245_a").str.contains(" bez ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" bez ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""lepku""",24
"""cukru""",6
"""cholesterolu""",6
"""mléka""",6
"""vážení""",5
"""hladovění""",3
"""zkušeností""",2
"""soli""",2
"""lepku,""",2
"""servítků""",2


In [74]:
grafik(' s ', funkce=kuchyne)

In [75]:
grafik(' bez ', funkce=kuchyne)

In [76]:
grafik('frit', funkce=kuchyne)

In [77]:
grafik('indi', funkce=kuchyne)

In [78]:
grafik('japon', funkce=kuchyne)

In [79]:
grafik('vietn', funkce=kuchyne)

In [80]:
grafik('pán[ev]', funkce=kuchyne)

In [81]:
grafik('mikrov', funkce=kuchyne)

In [82]:
grafik('gril', funkce=kuchyne)

In [83]:
grafik('sex', funkce=kuchyne)

In [84]:
grafik('babi', funkce=kuchyne)

In [85]:
grafik('pomaz', funkce=kuchyne)

In [86]:
grafik('cukr', funkce=kuchyne)

In [87]:
grafik('hrní', funkce=kuchyne)

In [88]:
grafik('hubn', funkce=kuchyne)

In [89]:
grafik('omáč', funkce=kuchyne)

In [90]:
grafik('pol[íé]v', funkce=kuchyne)

In [91]:
grafik('pomaz', funkce=kuchyne)

In [92]:
grafik('keto', funkce=kuchyne)

In [93]:
grafik('paleo', funkce=kuchyne)

In [94]:
grafik('houb', funkce=kuchyne)

In [95]:
grafik('(kvas|kvaš|ferment)', funkce=kuchyne)

In [96]:
grafik('makrobiot', funkce=kuchyne)

In [97]:
grafik('bílkov', funkce=kuchyne)

In [98]:
kucharky.select(pl.col("245_a")).to_series().to_list()

['Čokoláda',
 'Lesní kuchařka',
 'Kuchařka pro chlapy',
 'Dva italští gurmáni',
 'Velká kniha grilování',
 'Moučníky',
 'Francouzská venkovská kuchyně',
 'Polévková mánie',
 'Vánoční cukroví pro diabetiky',
 'Itálie očima labužníků',
 'Mami, pusť mě k vaření',
 'Česká kuchařka pro váš typ',
 'Děti v kuchyni vítány',
 'Makrobiotické těhotenství a péče o novorozeně',
 'Každý den něco pro zdraví a dobrou náladu',
 'Zanzibar, aneb, První světový průvodce Haliny Pawlowské',
 'Makrobiotická kuchyně doktora Jonáše 1',
 'Kuchařka pro kluky',
 'Lahůdky od Jadranu',
 'Příjemné chvíle s kávou',
 'Recepty z Valašska',
 'Snídaně u Florentýny',
 'Kuchařka veganská pralesní',
 'Čínská kuchyně',
 'Jídlem k radosti, aneb, Jednou týdně s Ivou',
 'Léčivá síla Bali',
 'Velká barevná hrníčková kuchařka',
 'Alergie na bílkoviny kravského mléka u novorozenců a malých kojenců',
 'Liga destilatérů a kurzy hladovění',
 'Dieta u jaterních onemocnění',
 'Rychlovky v kuchyni',
 'Afrodiziakální kuchařka',
 'Nanuky'