In [110]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu

pl.Config(tbl_rows=1000)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [111]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","650.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","653.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("stran") >= 30)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a","245_p"], keep="first")
print(len(df))

794838
506079


## Kdy se vyplnovalo pole 655_a?

In [113]:
df.filter(~pl.col("655_a").is_null()).group_by("rok").len().sort(by="rok").join(
    df.group_by('rok').len(), on='rok', how='left'
).with_columns((pl.col("len") / pl.col("len_right")).alias('vyplnenost')).tail(50)

rok,len,len_right,vyplnenost
i64,u32,u32,f64
1976,621,2823,0.219979
1977,667,2891,0.230716
1978,709,2932,0.241814
1979,900,3220,0.279503
1980,1036,3324,0.311673
1981,1035,3164,0.327118
1982,1445,3380,0.427515
1983,2483,3885,0.639125
1984,2522,3989,0.632239
1985,2534,3813,0.664569


In [114]:
df = df.filter(pl.col("rok").is_between(1989,2024))

In [115]:
df.filter(pl.col("245_a").str.contains("Každý den je nový")).explode("655_a")

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""autobiografické komiksy""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""deníky""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""autobiographical comics""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""diaries""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""citáty""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""aforismy""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""quotations""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""aphorisms""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""


In [116]:
df.filter(pl.col("245_a").str.contains("Pod dekou"))

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Thompson, Craig,""","""xx0037934""","[""aut"", ""ill""]","""1975-""",,,,,"""nkc20051632284""",""" nam a22 a 4500""","""051123s2005 xr a g 0…","""(váz.) :""","[""Kč 590,00""]","[""80-7341-603-4""]",,,,,,,"""1""","""0""","""Pod dekou""","""ilustrovaný román /""","""Craig Thompson ; [z anglického…",,,,,,"[""582 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""americké romány"", ""komiksy"", … ""comics""]","[""fd131796"", ""fd131978"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,582,"""pevná"""


In [117]:
zebricek = df.explode("655_a").group_by("655_a").len().sort(by="len",descending=True)
zebricek.filter(pl.col('len') >= 200)

655_a,len
str,u32
"""příručky""",32543
"""handbooks and manuals""",24429
"""učebnice vysokých škol""",21484
"""publikace pro děti""",19832
"""monografie""",18092
"""children's literature""",16878
"""monographs""",16462
"""textbooks (higher)""",13578
"""populárně-naučné publikace""",13502
"""studie""",12687


In [118]:
zebricek.filter(pl.col("655_a").str.contains("dívč"))

655_a,len
str,u32
"""dívčí romány""",209


In [119]:
df.sample(20)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Molnár, Josef,""","""jn20001005583""","[""aut""]","""1953-""",,,,,"""np9534039""",""" nam a22 4500""","""950323s1994 xr a u0…","""(brož.)""",,"[""80-85806-20-7""]",,,,,,,"""1""","""0""","""Matematika ve 4. ročníku""","""Příručka pro učitele /""","""Josef Molnár, Hana Mikulenková…",,,,,,"[""188 s. :""]","[""obr. ;""]","[""20 x 29 cm""]",,,,…,"[""7""]","[""metodické příručky""]","[""fd132826""]","[""czenas""]",,,,,"[""1"", ""1""]","[""Grepl, Tomáš"", ""Mikulenková, Hana,""]","[""art"", ""aut""]","[null, ""1954-""]","[""jx20040624010"", ""kup19950000065353""]",,,,,,,,,,,,,,,,,,,,,,1994,188,"""brožovaná"""
"""1""","""Winterson, Jeanette,""","""jn19990009203""","[""aut""]","""1959-""",,,,,"""cpk20041302740""",""" cam a22 a 4500""","""040216s2004 xr a e 0…","""(váz.)""",,"[""80-7203-542-8""]",,,,,,,"""1""","""0""","""Jak naštěpit třešeň""",,"""Jeanette Wintersonová ; [zčešt…",,,,,,"[""137 s. :""]","[""il. ;""]","[""21 cm""]",,,,…,"[""7"", ""9""]","[""anglické prózy"", ""English prose""]","[""fd131812"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2004,137,"""pevná"""
"""1""","""Krhut, Daniel,""","""ola2006323440""","[""aut""]","""1982-""",,,,,"""nkc20051630914""",""" nam a22 a 4500""","""051110s2005 xr g 0…","""(brož.)""",,"[""80-239-6131-4""]",,,,,,,"""1""","""0""","""Modrej soumrak nad městem""",,"""Daniel Krhut""",,,,,,"[""133 s. ;""]",,"[""21 cm""]",,,,…,"[""7"", ""9""]","[""české povídky"", ""Czech short stories""]","[""fd133971"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,133,"""brožovaná"""
"""1""","""Saniga, Miroslav,""","""jx20081110010""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20142610463""",""" nam a22 a 4500""","""140724s2014 xr a e 0…","""(brož.)""",,"[""978-80-7195-779-9""]",,,,,,,"""1""","""0""","""Boží stopy v přírodě""",,"""Miroslav Saniga ; [ze slovensk…",,,,,,"[""98 s. :""]","[""il. ;""]","[""17 cm""]",,,,…,"[""7"", ""9""]","[""úvahy"", ""essays""]","[""fd134000"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2014,98,"""brožovaná"""
"""1""","""Varadzin, František,""","""ola2003186243""","[""aut""]","""1952-""",,,,,"""nkc20061637859""",""" nam a22 a 4500""","""060112s2005 xr a f 0…","""(brož.)""",,"[""80-248-0968-0""]",,,,,,,"""1""","""0""","""Regiony a vnější ekonomické vz…",,"""[František Varadzin a kol.]""",,,,,,"[""195 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,195,"""brožovaná"""
"""1""","""Žákovec, Jan,""","""xx0000113""","[""aut""]","""1958-""",,,,,"""nkc20092025255""",""" nam a22 a 4500""","""091210s2009 xr ah f 0…","""(váz.)""",,"[""978-80-7328-216-5""]",,,,,,,"""1""","""0""","""Plynové lampy""",,"""Jan Žákovec""",,,,,,"[""107 s. :""]","[""il. (převážně barev.), faksim. ;""]","[""25 cm""]",,,,…,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2009,107,"""pevná"""
"""1""","""Cílek, Roman,""","""jk01020709""","[""aut""]","""1937-""",,,,,"""nkc20132438866""",""" nam a22 a 4500""","""130131s2013 xr acfhe 0…","""(Pražská vydavatelská společno…",,"[""978-80-7250-634-7"", ""978-80-7425-162-7""]",,,,,,,"""1""","""0""","""Noc dlouhých nožů""","""osudový zlom v hitlerovské éře…","""Roman Cílek""",,,,,,"[""110 s., [8] s. obr. příl. :""]","[""il., portréty, 1 faksim. ;""]","[""21 cm""]",,,,…,"[""7"", ""9""]","[""literatura faktu"", ""non-fiction""]","[""fd132773"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,110,
"""1""","""Schönfeld, Petr,""","""jn20000710507""","[""aut""]","""1949-""",,,,,"""cpk20253669097""",""" cam a22 i 4500""","""190110s2018 xr achje 0…","""(Brožováno)""",,,,,,,,,"""1""","""0""","""10 generací rodu Schönfeldů""",,"""Petr Schönfeld""",,,,,,"[""244 stran :""]","[""ilustrace, portréty, faksimile, genealogické tabulky ;""]","[""30 cm""]",,,,…,"[""7"", ""7"", … ""7""]","[""biografie"", ""studie"", … ""obrazové publikace""]","[""fd131909"", ""fd133597"", … ""fd132947""]","[""czenas"", ""czenas"", … ""czenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,244,"""brožovaná"""
"""1""","""Fejtová, Olga,""","""ola2004212142""","[""aut""]","""1962-""",,,,,"""nkc20142588119""",""" nam a22 a 4500""","""140501s2014 xr ah f 0…","""(Archiv hl. m. Prahy ;""",,"[""978-80-86852-56-0"", ""978-80-7414-625-1"", ""978-80-87271-93-3""]",,,,,,,"""1""","""0""","""Jednota bratrská v městech pra…",,"""Olga Fejtová""",,,,,,"[""207 s. :""]","[""il. (převážně barev.), faksim. ;""]","[""22 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""monografie"", ""soupisy"", … ""reproductions""]","[""fd132842"", ""fd133560"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2014,207,
"""1""","""Marek Litvová, Zuzana,""","""jn20031215018""","[""aut""]","""1977-""",,,,,"""nkc20112234508""",""" cam a22 a 4500""","""110916s2011 xr acf e 0…","""(brož.)""",,"[""978-80-904771-2-4""]",,,,,,,"""1""","""0""","""Holka s tajemstvím""","""[skutečný příběh o lásce, nadě…","""Zuzana Marek Litvová""",,,,,,"[""152 s., [20] s. obr. příl. :""]","[""il. (některé barev.), portréty ;""]","[""21 cm""]",,,,…,"[""7"", ""9""]","[""autobiografické vzpomínky"", ""autobiographical reminiscences""]","[""fd131854"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,152,"""brožovaná"""


In [120]:
vyvoj = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [121]:
vyvoj.filter(pl.col("celkem") >= 100).head(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""romány pro ženy""",1,"""romány pro ženy""",595,0.001681,596
"""women's novels""",1,"""women's novels""",595,0.001681,596
"""fantasy comics""",1,"""fantasy comics""",259,0.003861,260
"""fantasy komiksy""",1,"""fantasy komiksy""",259,0.003861,260
"""akční a dobrodružné komiksy""",1,"""akční a dobrodružné komiksy""",255,0.003922,256
"""action and adventure comics""",1,"""action and adventure comics""",252,0.003968,253
"""superhrdinské komiksy""",1,"""superhrdinské komiksy""",221,0.004525,222
"""superhero comics""",1,"""superhero comics""",219,0.004566,220
"""autorské knihy""",1,"""autorské knihy""",164,0.006098,165
"""authors' books""",1,"""authors' books""",162,0.006173,163


In [122]:
vyvoj.filter(pl.col("celkem") >= 100).tail(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""humoristické příběhy""",44,"""humoristické příběhy""",128,0.34375,172
"""detective stories""",27,"""detective stories""",78,0.346154,105
"""Czech prose""",147,"""Czech prose""",420,0.35,567
"""obrazové publikace""",372,"""obrazové publikace""",1048,0.354962,1420
"""české romány""",751,"""české romány""",2083,0.360538,2834
"""texts""",38,"""texts""",101,0.376238,139
"""popular works""",742,"""popular works""",1949,0.380708,2691
"""fotografické publikace""",556,"""fotografické publikace""",1453,0.382657,2009
"""autobiografické prózy""",44,"""autobiografické prózy""",113,0.389381,157
"""catalogs""",66,"""catalogs""",168,0.392857,234


In [123]:
df.select(pl.col("653_a")).drop_nulls().sample(50)

653_a
list[str]
"[""chladné zbraně"", ""historická zbroj"", … ""palné zbraně""]"
"[""ekonomický vývoj"", ""hospodářské dějiny"", … ""české země""]"
"[""střevní mikrobiom""]"
"[""Kázání na hoře"", ""aktuální myšlenky"", ""evangelium Matoušovo""]"
"[""söpönlové""]"
"[""biocentrismus""]"
"[""absorpční kalkulace"", ""analýza bodu zvratu"", … ""transformace nákladů""]"
"[""psychosociální pomoc""]"
"[""duchovní hudba"", ""evangelizace"", … ""tělesná uzdravení""]"
"[""drobné portréty""]"


In [124]:
vyvoj2 = df.filter(pl.col("rok").is_between(2000,2003)).explode("653_a").group_by("653_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("653_a").group_by("653_a").len(), on="653_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")
vyvoj2.filter(pl.col("celkem") > 5)

653_a,len,653_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
,0,"""jazyková úroveň B2""",7,0.0,7
,0,"""jazyková úroveň A1""",15,0.0,15
,0,"""jazyková úroveň A2""",14,0.0,14
,0,,31951,0.0,31951
,0,"""literatura new adult""",50,0.0,50
,0,"""jazyková úroveň B1""",14,0.0,14
,31255,,0,0.0,31255
"""denní četba""",13,"""denní četba""",44,0.295455,57


In [125]:
vyvoj3 = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [126]:
vyvoj3.filter(pl.col("celkem") > 100).filter(pl.col('rozdil') == 0).sort(by='celkem',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""Love stories""",1258,,0,0.0,1258
"""Textbooks""",1125,,0,0.0,1125
"""Detective and mystery stories""",491,,0,0.0,491
"""Short stories, Czech""",429,,0,0.0,429
"""Science fiction""",393,,0,0.0,393
,0,"""manga""",339,0.0,339
"""Adventure stories""",333,,0,0.0,333
,307,,0,0.0,307
,0,"""young adult literature""",281,0.0,281
,0,"""literatura young adult""",280,0.0,280


In [127]:
vyvoj3.filter(pl.col("celkem") > 200).filter(pl.col('rozdil') > 1).sort(by='rozdil',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""milostné povídky""",492,"""milostné povídky""",36,13.666667,528
"""lyrická poezie""",187,"""lyrická poezie""",20,9.35,207
"""právní předpisy""",225,"""právní předpisy""",32,7.03125,257
"""učebnice vysokých škol""",3224,"""učebnice vysokých škol""",533,6.04878,3757
"""studie""",2336,"""studie""",471,4.95966,2807
"""vědecko-fantastické povídky""",229,"""vědecko-fantastické povídky""",50,4.58,279
"""textbooks (higher)""",2249,"""textbooks (higher)""",529,4.251418,2778
"""učebnice""",392,"""učebnice""",113,3.469027,505
"""studies""",1454,"""studies""",456,3.188596,1910
"""dobrodružné povídky""",314,"""dobrodružné povídky""",101,3.108911,415


In [128]:
top = ['komiksy','young adult',"erotické romány"]
flop = ["učebnice","slovníky","encyklopedie"]

In [129]:
def podil_zanru(zanr):
    return df.explode("655_a").filter(
        pl.col("655_a").str.contains("(?i)" + zanr)
    ).group_by(
        "rok"
    ).len(
    ).join(df.drop_nulls(subset=["655_a"]).group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).with_columns(pl.lit(zanr).alias("zanr"))

In [130]:
df.explode("655_a").filter(pl.col('655_a') == "erotické romány").group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""Vlci zvěrokruhu""",4
"""Královská sága plná sexu""",4
"""Twisted""",4
"""Barvy lásky""",4
"""Hříšné odstíny vášně""",4
"""Jezdci apokalypsy""",4
"""After hours""",3
"""Sin trilogy""",3
"""Mount trilogy""",3
"""Bratři Steelové""",3


In [131]:
podil_zanru("komiks")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
4,1989,3114,0.001285,"""komiks"""
6,1990,3219,0.001864,"""komiks"""
8,1991,3599,0.002223,"""komiks"""
8,1992,4450,0.001798,"""komiks"""
5,1993,5949,0.00084,"""komiks"""
11,1994,6066,0.001813,"""komiks"""
8,1995,6783,0.001179,"""komiks"""
6,1996,7011,0.000856,"""komiks"""
10,1997,7394,0.001352,"""komiks"""
10,1998,7207,0.001388,"""komiks"""


In [132]:
top_data = pl.concat([podil_zanru(z) for z in top])
flop_data = pl.concat([podil_zanru(z) for z in flop])

In [133]:
top_graf = alt.Chart(
    alt_friendly(top_data),title="Rostoucí kategorie…", width=300, height=80
).mark_area().encode(
    alt.X("rok:T", title=None),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6'), ),
    alt.Row("zanr", title=None, sort=top,
           header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'))
).resolve_axis(y="independent").resolve_scale(y="independent")

flop_graf = alt.Chart(
    alt_friendly(flop_data),title="…upadající kategorie", width=300, height=80
).mark_area().encode(
    alt.X("rok:T", title=None),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6'), ),
    alt.Row("zanr", title=None, sort=flop,
           header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'))
).resolve_axis(y="independent").resolve_scale(y="independent")

alt.vconcat(top_graf, flop_graf).configure_view(stroke='transparent')

In [134]:
podil_zanru("pohádky")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
6,1989,3114,0.001927,"""pohádky"""
10,1990,3219,0.003107,"""pohádky"""
15,1991,3599,0.004168,"""pohádky"""
52,1992,4450,0.011685,"""pohádky"""
58,1993,5949,0.00975,"""pohádky"""
42,1994,6066,0.006924,"""pohádky"""
56,1995,6783,0.008256,"""pohádky"""
38,1996,7011,0.00542,"""pohádky"""
68,1997,7394,0.009197,"""pohádky"""
52,1998,7207,0.007215,"""pohádky"""


In [135]:
podil_zanru("erotick")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
,1989,3114,,"""erotick"""
9.0,1990,3219,0.002796,"""erotick"""
9.0,1991,3599,0.002501,"""erotick"""
4.0,1992,4450,0.000899,"""erotick"""
6.0,1993,5949,0.001009,"""erotick"""
9.0,1994,6066,0.001484,"""erotick"""
,1995,6783,,"""erotick"""
10.0,1996,7011,0.001426,"""erotick"""
14.0,1997,7394,0.001893,"""erotick"""
3.0,1998,7207,0.000416,"""erotick"""


In [136]:
podil_zanru("dívčí romány")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
2.0,1989,3114,0.000642,"""dívčí romány"""
1.0,1990,3219,0.000311,"""dívčí romány"""
4.0,1991,3599,0.001111,"""dívčí romány"""
2.0,1992,4450,0.000449,"""dívčí romány"""
1.0,1993,5949,0.000168,"""dívčí romány"""
,1994,6066,,"""dívčí romány"""
,1995,6783,,"""dívčí romány"""
,1996,7011,,"""dívčí romány"""
1.0,1997,7394,0.000135,"""dívčí romány"""
,1998,7207,,"""dívčí romány"""


In [137]:
podil_zanru("populárně-naučné publikace")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
41,1989,3114,0.013166,"""populárně-naučné publikace"""
40,1990,3219,0.012426,"""populárně-naučné publikace"""
52,1991,3599,0.014448,"""populárně-naučné publikace"""
65,1992,4450,0.014607,"""populárně-naučné publikace"""
119,1993,5949,0.020003,"""populárně-naučné publikace"""
131,1994,6066,0.021596,"""populárně-naučné publikace"""
203,1995,6783,0.029928,"""populárně-naučné publikace"""
164,1996,7011,0.023392,"""populárně-naučné publikace"""
209,1997,7394,0.028266,"""populárně-naučné publikace"""
253,1998,7207,0.035105,"""populárně-naučné publikace"""


In [138]:
def grafik(z, funkce=podil_zanru):
    return alt.Chart(alt_friendly(funkce(z))).mark_line().encode(
        alt.X("rok:T",axis=alt.Axis(title=None)),
        alt.Y('podil:Q',axis=alt.Axis(
            labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6',title=None
        ))
    ).configure_view(stroke='transparent')

In [139]:
grafik('erotické')

In [140]:
grafik('CD-ROM')

In [141]:
grafik("dvojjazyčná vydání")

In [142]:
grafik("fantasy romány"	)

In [143]:
grafik("učebnice základních škol")

In [144]:
grafik("aforismy")

In [145]:
grafik("rusk")

In [146]:
grafik("young adult")

In [147]:
grafik("příručky")

In [148]:
grafik("dívčí romány")

In [149]:
grafik("komiks")

In [150]:
grafik("detektiv")

In [151]:
grafik("rozhovory")

In [152]:
grafik("deníky")

In [153]:
grafik("autobiogr")

In [154]:
grafik("encyclopedias")

In [155]:
grafik("učebnice vysokých škol")

In [156]:
grafik("kuchař")

## Kuchařky

In [158]:
kucharky = df.explode("655_a").filter(
        pl.col("655_a").str.contains("kuchař")
    )

In [159]:
kucharky.sample(10)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""McFadden, Christine""","""jn20000401765""","[""aut""]",,,,,,"""cpk19990770006""",""" nam a22 a 4500""","""991112s1999 xr a f f 0…","""(váz.)""",,"[""80-7234-056-5""]",,,,,,,"""1""","""0""","""Velká encyklopedie čokolády""","""více než 200 receptů /""","""Christine McFaddenová & Christ…",,,,,,"[""256 s. :""]","[""barev. il. ;""]","[""31 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd132842"", ""fd132687"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1""]","[""France, Christine""]","[""aut""]",,"[""xx0072816""]",,,,,,,,,,,,,,,,,,,,,,1999,256,"""pevná"""
"""1""","""Kocábová, Marsha,""","""xx0095594""","[""aut""]","""1954-""",,,,,"""nkc20203175105""",""" nam a22 i 4500""","""200207s2020 xr a e f 0…","""(vázáno) :""","[""Kč 299,00""]","[""978-80-267-1710-2""]",,,,,,,"""1""","""0""","""Jižanská kuchařka""",,"""Marsha Kocábová ; přeložila Al…",,,,,,"[""105 stran :""]","[""barevné ilustrace ;""]","[""24 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,"[""1""]","[""Belánová, Alžběta""]","[""trl""]",,"[""mzk20201066215""]",,,,,,,,,,,,,,,,,,,,,,2020,105,"""pevná"""
"""1""","""Trnková, Klára,""","""jo2008427188""","[""aut""]","""1949-""",,,,,"""nkc20122268239""",""" cam a22 a 4500""","""111010s2011 xr a e f 0…","""(brož.)""",,"[""978-80-87209-78-3""]",,,,,,,"""1""","""0""","""Tajné narozeninové recepty naš…",,"""Klára Trnková""",,,,,,"[""39 s. :""]","[""barev. il. ;""]","[""15 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,39,"""brožovaná"""
"""1""","""Keithová, Jana,""","""mzk2018980142""","[""aut"", ""pht""]","""1969-""",,,,,"""nkc20182972059""",""" nam a22 i 4500""","""180124s2018 xr a e f 0…","""(kroužková vazba) :""","[""Kč 169,00""]","[""978-80-253-3561-1""]",,,,,,,"""1""","""0""","""Zdravé svačiny do školy i do p…",,"""Jana Keithová""",,,,,,"[""70 stran :""]","[""barevné ilustrace ;""]","[""25 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,70,
"""1""","""Toufar, Pavel,""","""jk01132822""","[""aut""]","""1948-2018""",,,,,"""cpk20000655765""",""" cam a22 a 4500""","""990421s1999 xr af e f 0…","""(brož.) :""","[""Kč 69,00""]","[""80-86136-31-0""]",,,,,,,"""1""","""0""","""Kuchařka pro krkonošského medv…",,"""Pavel Toufar ; fotografie Vlad…",,,,,,"[""149 s., [4] s. barev. obr. příl. :""]","[""il. (některé barev.) ;""]","[""20 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,"[""1"", ""1""]","[""Martenek, Miloslav,"", ""Doležal, Vladimír,""]","[""ill"", ""pht""]","[""1933-"", ""1952-""]","[""jn20000401735"", ""jn19981000720""]",,,,,,,,,,,,,,,,,,,,,,1999,149,"""brožovaná"""
"""1""","""Živsová, Jitka""","""mzk2008468968""","[""aut""]",,,,,,"""nkc20102130376""",""" nam a22 a 4500""","""101222s2010 xr e f 0…","""(váz.) :""","[""Kč 199,00""]","[""978-80-87089-30-9""]",,,,,,,"""1""","""0""","""Minutky a zákusky""","""nové recepty na rychlou přípra…","""Jitka Živsová""",,,,,,"[""120 s. ;""]",,"[""25 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2010,120,"""pevná"""
"""1""","""Hrabětová, Jana,""","""jk01042597""","[""aut"", ""edt""]","""1943-""",,,,,"""nkc20243627485""",""" nam a22 i 4500""","""240822s1996 xr a e f 0…","""(Brožováno)""",,,,,,,,,"""1""","""0""","""Koření v kuchyni našich babiče…",,"""zpracování a příprava k tisku …",,,,,,"[""39 stran :""]","[""ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd133209"", ""fd132687"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1996,39,"""brožovaná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20112186290""",""" nam a22 a 4500""","""110502s2011 xr af e f 0…","""(brož.)""",,"[""978-80-87156-60-5""]",,,,,,,"""1""","""0""","""Vaříme, šetříme (kapsu i čas)""",,"""Jarmila Mandžuková""",,,,,,"[""150 s., [16] s. obr. příl. :""]","[""barev. il. ;""]","[""21 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,150,"""brožovaná"""
"""1""","""Dahlke, Rüdiger,""","""jo20000080475""","[""aut""]","""1951-""",,,,,"""nkc20152730281""",""" nam a22 i 4500""","""150910s2015 xr a e 0…","""(vázáno)""",,"[""978-80-264-0844-4""]",,,,,,,"""1""","""0""","""Peace food""","""italská veganská kuchařka /""","""Ruediger Dahlke ; překlad: Dag…",,,,,,"[""191 stran :""]","[""barevné ilustrace ;""]","[""23 cm""]",,,,…,"[""7"", ""9""]","""kuchařské recepty""","[""fd132687"", null]","[""czenas"", ""eczenas""]",,,,,"[""1""]","[""Sklenářová, Dagmar,""]","[""trl""]","[""1973-""]","[""js20060526018""]",,,,,,,,,,,,,,,,,,,,,,2015,191,"""pevná"""
"""1""","""Milatová, Růžena""","""mzk2011644878""","[""aut""]",,,,,,"""nkc20182977916""",""" nam a22 i 4500""","""180205s2018 xr af e f 0…","""(brožováno) :""","[""Kč 168,00""]","[""978-80-7429-992-6""]",,,,,,,"""1""","""0""","""Bezezbytková dieta při onemocn…","""112 receptů /""","""Růžena Milatová, Pavel Wohl""",,,,,,"[""76 stran, 8 nečíslovaných stran obrazových příloh :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""kuchařské recepty""","[""fd132687"", ""fd133209"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1""]","[""Wohl, Pavel,""]","[""aut""]","[""1970-""]","[""xx0054427""]",,,,,,,,,,,,,,,,,,,,,,2018,76,"""brožovaná"""


In [160]:
def kuchyne(slovo):
    return kucharky.filter(
        pl.col("245_a").str.contains("(?i)" + slovo) | pl.col("245_a").str.contains("(?i)" + slovo)
    ).group_by(
        "rok"
    ).len(
    ).join(kucharky.group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).fill_null(0)

In [161]:
kuchyne('barbe')

pocet,rok,len_right,podil
u32,i64,u32,f64
0,1989,16,0.0
0,1990,20,0.0
0,1991,39,0.0
0,1992,57,0.0
0,1993,78,0.0
0,1994,80,0.0
0,1995,94,0.0
0,1996,91,0.0
0,1997,95,0.0
0,1998,103,0.0


In [162]:
grafik('barbe', funkce=kuchyne)

In [163]:
grafik('vegan', funkce=kuchyne)

In [164]:
grafik('sous ', funkce=kuchyne)

In [165]:
grafik('svačin', funkce=kuchyne)

In [166]:
grafik('pečiv', funkce=kuchyne)

In [167]:
grafik('chleb', funkce=kuchyne)

In [168]:
grafik('peče', funkce=kuchyne)

In [169]:
grafik('diabet', funkce=kuchyne)

In [170]:
grafik('gril', funkce=kuchyne)

In [171]:
grafik('cukrov', funkce=kuchyne)

In [172]:
grafik('zdrav', funkce=kuchyne)

In [173]:
grafik('bez ', funkce=kuchyne)

In [174]:
kucharky.filter(pl.col("245_a").str.contains("pro")).with_columns(pl.col("245_a").map_elements(lambda x: x.split("pro")[1].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""diabetiky""",25
"""děti""",14
"""každou příležitost""",12
"""každého""",10
"""labužníky""",9
"""cukrářskou výrobu""",8
"""moderní ženu""",8
"""začátečníky""",8
"""každý den""",8
"""zdraví""",7


In [175]:
kucharky.filter(pl.col("245_a").str.contains(" s ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" s ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""láskou""",10
"""Měsícem""",6
"""Ellou""",5
"""fantazií""",4
"""Habadějem""",4
"""dětmi""",4
"""bylinkami""",4
"""konopím""",4
"""pivem""",3
"""dělenou""",3


In [176]:
kucharky.filter(pl.col("245_a").str.contains(" bez ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" bez ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""lepku""",24
"""mléka""",6
"""cholesterolu""",6
"""cukru""",6
"""vážení""",5
"""hladovění""",3
"""servítků""",2
"""soli""",2
"""zkušeností""",2
"""lepku,""",2


In [177]:
grafik(' s ', funkce=kuchyne)

In [178]:
grafik(' bez ', funkce=kuchyne)

In [179]:
grafik('frit', funkce=kuchyne)

In [180]:
grafik('indi', funkce=kuchyne)

In [181]:
grafik('japon', funkce=kuchyne)

In [182]:
grafik('vietn', funkce=kuchyne)

In [183]:
grafik('pán[ev]', funkce=kuchyne)

In [184]:
grafik('mikrov', funkce=kuchyne)

In [185]:
grafik('gril', funkce=kuchyne)

In [186]:
grafik('sex', funkce=kuchyne)

In [187]:
grafik('babi', funkce=kuchyne)

In [188]:
grafik('pomaz', funkce=kuchyne)

In [189]:
grafik('cukr', funkce=kuchyne)

In [190]:
grafik('hrní', funkce=kuchyne)

In [191]:
grafik('hubn', funkce=kuchyne)

In [192]:
grafik('omáč', funkce=kuchyne)

In [193]:
grafik('pol[íé]v', funkce=kuchyne)

In [194]:
grafik('pomaz', funkce=kuchyne)

In [195]:
grafik('keto', funkce=kuchyne)

In [196]:
grafik('paleo', funkce=kuchyne)

In [197]:
grafik('houb', funkce=kuchyne)

In [198]:
grafik('(kvas|kvaš|ferment)', funkce=kuchyne)

In [199]:
grafik('makrobiot', funkce=kuchyne)

In [200]:
grafik('bílkov', funkce=kuchyne)

In [201]:
kucharky.select(pl.col("245_a")).to_series().to_list()

['Kuchařka podle jógy',
 '100 úžasných chlebů',
 'Česká kuchařka pro váš typ',
 'Recept na štěstí?',
 'Zoufalé manželky',
 'Hrníčková kuchařka',
 'Sendviče a toasty',
 'Kincugi pro šťastný život',
 'Scarlatina, zesnulá kuchařka',
 'Vánoční kuchařka',
 'Míchané nápoje',
 'V hlavní roli chřest',
 'Kluci v akci',
 'Hádej, kam půjdem na oběd?',
 'Moderní osvěžující nápoje',
 'Kuchařka',
 'Velikonoční kuchařka',
 'Kulinářské cesty po České republice',
 'To nejlepší z levné kuchyně',
 'Fenylketonurie a mateřství',
 'KU-DO-KR',
 'Milion menu',
 'Sladkosti, po kterých se netloustne',
 'Božská kuchařka',
 'Jednoduchá kuchyně',
 'Bleskové likéry',
 'Škola vaření pro celou rodinu',
 'Chytré jídlo',
 'Tajemství australské kuchyně',
 'Kulinářská velmoc Británie',
 'Řízky mnoha druhů',
 'Vánoční kuchařka',
 'Mamachef',
 'Speciality ze zvěřiny',
 'Dělená strava, aneb, Hubneme zdravě',
 'Maminčiny dobroty',
 'Pečeme zdravě bez lepku',
 'Bylinky, plody a houby',
 'Zpívající kuchařka z Čech, Moravy, Sle