In [1]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu

pl.Config(tbl_rows=1000)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [2]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","650.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","653.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("stran") >= 30)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a","245_p"], keep="first")
print(len(df))

794838
506079


## Kdy se vyplnovalo pole 655_a?

In [4]:
df.filter(~pl.col("655_a").is_null()).group_by("rok").len().sort(by="rok").join(
    df.group_by('rok').len(), on='rok', how='left'
).with_columns((pl.col("len") / pl.col("len_right")).alias('vyplnenost')).tail(50)

rok,len,len_right,vyplnenost
i64,u32,u32,f64
1976,621,2823,0.219979
1977,667,2891,0.230716
1978,709,2932,0.241814
1979,900,3220,0.279503
1980,1036,3324,0.311673
1981,1035,3164,0.327118
1982,1445,3380,0.427515
1983,2483,3885,0.639125
1984,2522,3989,0.632239
1985,2534,3813,0.664569


In [5]:
df = df.filter(pl.col("rok").is_between(1989,2024))

In [6]:
df.filter(pl.col("245_a").str.contains("Každý den je nový")).explode("655_a")

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""autobiografické komiksy""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""deníky""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""autobiographical comics""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Lomová, Lucie,""","""xx0026705""","[""aut"", ""ill""]","""1964-""",,,,,"""nkc20223471706""",""" nam a22 i 4500""","""221110s2022 xr a g 6 0…","""(vázáno)""",,"[""978-80-88378-17-4""]",,,,,,,"""1""","""0""","""Každý den je nový""","""komiksový deník /""","""Lucie Lomová""",,,,,,"[""106, 98 stran :""]","[""barevné ilustrace ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""diaries""","[""fd1058616"", ""fd132006"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,106,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""citáty""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""aforismy""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""quotations""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""
"""1""","""Mandžuková, Jarmila,""","""jn20001103350""","[""aut""]","""1959-""",,,,,"""nkc20183060546""",""" nam a22 i 4500""","""181206s2018 xr g 0…","""(vázáno) :""","[""Kč 229,00""]","[""978-80-7601-047-5""]",,,,,,,"""1""","""0""","""Každý den je nový začátek""","""každý nový den je zázrak, aneb…","""Jarmila Mandžuková""",,,,,,"[""119 stran ;""]",,"[""14 cm""]",,,,…,"[""7"", ""7"", … ""9""]","""aphorisms""","[""fd131832"", ""fd131784"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018,119,"""pevná"""


In [7]:
df.filter(pl.col("245_a").str.contains("Pod dekou"))

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Thompson, Craig,""","""xx0037934""","[""aut"", ""ill""]","""1975-""",,,,,"""nkc20051632284""",""" nam a22 a 4500""","""051123s2005 xr a g 0…","""(váz.) :""","[""Kč 590,00""]","[""80-7341-603-4""]",,,,,,,"""1""","""0""","""Pod dekou""","""ilustrovaný román /""","""Craig Thompson ; [z anglického…",,,,,,"[""582 s. :""]","[""il. ;""]","[""25 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""americké romány"", ""komiksy"", … ""comics""]","[""fd131796"", ""fd131978"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2005,582,"""pevná"""


In [8]:
zebricek = df.explode("655_a").group_by("655_a").len().sort(by="len",descending=True)
zebricek.filter(pl.col('len') >= 200)

655_a,len
str,u32
"""příručky""",32543
"""handbooks and manuals""",24429
"""učebnice vysokých škol""",21484
"""publikace pro děti""",19832
"""monografie""",18092
"""children's literature""",16878
"""monographs""",16462
"""textbooks (higher)""",13578
"""populárně-naučné publikace""",13502
"""studie""",12687


In [9]:
zebricek.filter(pl.col("655_a").str.contains("dívč"))

655_a,len
str,u32
"""dívčí romány""",209


In [10]:
df.sample(20)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Gardner, Erle Stanley,""","""jn19981001320""","[""aut""]","""1889-1970""",,,,,"""cpk20000886431""",""" cam a22 a 4500""","""000817s2000 xr g 0…","""(váz.)""",,"[""80-85776-89-8""]",,,,,,,"""1""","""0""","""Záhada bílých gardénií""",,"""E.S. Gardner ; [z anglického o…",,,,,,"[""174 s. ;""]",,"[""20 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""americké romány"", ""detektivní romány"", … ""detective novels""]","[""fd131796"", ""fd132010"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2000,174,"""pevná"""
"""1""","""Jonas, Nina""",,"[""aut""]",,,,,,"""cpk20010886381""",""" cam a22 a 4500""","""000728s2000 xr e 0…","""(brož.) :""","[""Kč 25,00""]","[""80-243-0355-8""]",,,,,,,"""1""","""0""","""Sladké polibky""",,"""Nina Jonasová""",,,,,,"[""63 s. ;""]",,"[""22 cm""]",,,,…,"[""7"", ""9""]","[""milostné povídky"", ""Love stories""]","[""fd132834"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2000,63,"""brožovaná"""
"""1""","""Leong, Sonia,""","""xx0094953""","[""ill""]","""1982-""",,,,,"""nkc20091927228""",""" nam a22 a 4500""","""090424s2009 xr a d 0…","""(brož.)""",,"[""978-80-00-02271-0""]",,,,,,,"""1""","""0""","""Romeo & Julie""",,"""ilustrovala Sonia Leong""",,,,,,"[""196 s. :""]","[""vše il. ;""]","[""21 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""anglická dramata"", ""tragédie"", … ""juvenile literature""]","[""fd131806"", ""fd133711"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1"", ""1""]","[""Hilský, Martin,"", ""Zátka, Ondřej,""]","[""aft"", ""edt""]","[""1943-"", ""1980-""]","[""jk01041063"", ""xx0114449""]",,,,,,,,,,,,,,,,,,,,,,2009,196,"""brožovaná"""
"""1""","""Přidal, Tomáš,""","""jn19981228114""","[""aut"", ""ill""]","""1968-""",,,,,"""nkc20112218922""",""" nam a22 a 4500""","""110801s2011 xr a e 0…","""(váz.)""",,"[""978-80-7227-311-9""]",,,,,,,"""1""","""0""","""Pikantní poldové""",,"""Tomáš Přidal""",,,,,,"[""113 s. :""]","[""barev. il. ;""]","[""22 cm""]",,,,…,"[""7"", ""9""]","[""česká poezie"", ""Czech poetry""]","[""fd133958"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,113,"""pevná"""
"""1""","""Bernhard, Thomas,""","""jn19990000738""","[""aut""]","""1931-1989""",,,,,"""nkc20132474868""",""" nam a22 a 4500""","""130712s2013 xr g 0…","""(váz.) :""","[""Kč 347,00""]","[""978-80-7260-274-2""]",,,,,,,"""1""","""0""","""Pravdě na stopě""",,"""Thomas Bernhard ; přeložili Ni…",,,,,,"[""287 s. ;""]",,"[""19 cm""]",,,,…,"[""7"", ""7"", … ""9""]","[""rozhovory"", ""eseje"", … ""selected works""]","[""fd133303"", ""fd132213"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,287,"""pevná"""
"""1""","""Beneš, Pavel""","""mzk20201064219""","[""aut""]",,,,,,"""nkc20213365080""",""" cam a22 i 4500""","""210817s2019 xr e |…",,,,,,,,,,"""1""","""0""","""850 let Nečtin""",,"""texty: Pavel Beneš, Irena Buka…",,,,,,"[""125 stran :""]","[""ilustrace (převážně barevné), faksimile ;""]","[""22 x 30 cm""]",,,,…,"[""7"", ""7""]","[""kolektivní monografie"", ""jubilejní publikace""]","[""fd501537"", ""fd131839""]","[""czenas"", ""czenas""]",,,,,"[""1"", ""1"", … ""1""]","[""Bukačová, Irena,"", ""Fák, Jiří,"", … ""Zelenka, Antonín,""]","[""aut"", ""aut"", … ""aut""]","[""1949-"", ""1964-"", … ""1978-""]","[""jn19981000358"", ""mzk2005282418"", … ""mzk20201069028""]",,,,,,,,,,,,,,,,,,,,,,2019,125,
"""1""","""Kepel, Gilles,""","""jn19990004273""","[""aut""]","""1955-""",,,,,"""nkc20223402518""",""" nam a22 i 4500""","""220317s2022 xr b f 0…","""(brožováno)""",,"[""978-80-246-5179-8""]",,,,,,,"""1""","""0""","""Cesta z chaosu""","""krize ve Středomoří a na Blízk…","""Gilles Kepel ; z francouzského…",,,,,,"[""401 stran :""]","[""mapy ;""]","[""21 cm""]",,,,…,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,"[""1""]","[""Belisová, Šárka,""]","[""trl""]","[""1957-""]","[""mzk2003185055""]",,,,,,,,,,,,,,,,,,,,,,2022,401,"""brožovaná"""
"""1""","""Chrástová, Hana""","""jx20081201013""","[""aut""]",,,,,,"""nkc20081831475""",""" nam a22 a 4500""","""081120s2008 xr c e 0…","""(brož.)""",,"[""978-80-86986-20-3""]",,,,,,,"""1""","""0""","""Prezidenti Spojených států ame…",,"""[Hana Chrástová, Tomáš Novotný…",,,,,,"[""93 s. :""]","[""portréty (některé barev.) ;""]","[""25 cm""]",,,,…,"[""7"", ""9""]","[""medailony"", ""biographical portraits""]","[""fd185951"", null]","[""czenas"", ""eczenas""]",,,,,"[""1""]","[""Novotný, Tomáš""]","[""aut""]",,"[""xx0055544""]",,,,,,,,,,,,,,,,,,,,,,2008,93,"""brožovaná"""
"""1""","""Penn, Nate""",,"[""aut""]",,,,,,"""cpk19970291290""",""" nam a22 a 4500""","""980108s1997 xr e 0…","""(brož.)""",,"[""80-85872-83-8""]",,,,,,,"""1""","""0""","""Taktika mužů""","""kodex zásad, jak dostat od žen…","""Nate Penn, Lawrence LaRose ; […",,,,,,"[""109 s. ;""]",,"[""20 cm""]",,,,…,"[""7""]","[""populárně-naučné publikace""]","[""fd131864""]","[""czenas""]",,,,,"[""1"", ""1""]","[""Hauserová, Eva,"", ""LaRose, Lawrence""]","[""trl"", ""aut""]","[""1954-2023"", null]","[""jk01040194"", null]",,,,,,,,,,,,,,,,,,,,,,1997,109,"""brožovaná"""
"""1""","""Darman, Peter""","""jn20001005424""","[""aut""]",,,,,,"""cpk19980296020""",""" cam a22 a 4500""","""980428s1997 xr a e 0…","""(váz.)""",,"[""80-7180-331-6""]",,,,,,,"""1""","""0""","""Zbraně a vybavení SAS""",,"""Peter Darman ; [z angličtiny p…",,,,,,"[""192 s. :""]","[""il. (převážně barev.) ;""]","[""25 cm""]",,,,…,"[""7""]","[""obrazové publikace""]","[""fd132947""]","[""czenas""]",,,,,"[""1""]","[""Háčik, Ľubomír,""]","[""trl""]","[""1954-""]","[""xx0017178""]",,,,,,,,,,,,,,,,,,,,,,1997,192,"""pevná"""


In [11]:
vyvoj = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [12]:
vyvoj.filter(pl.col("celkem") >= 100).head(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""romány pro ženy""",1,"""romány pro ženy""",595,0.001681,596
"""women's novels""",1,"""women's novels""",595,0.001681,596
"""fantasy comics""",1,"""fantasy comics""",259,0.003861,260
"""fantasy komiksy""",1,"""fantasy komiksy""",259,0.003861,260
"""akční a dobrodružné komiksy""",1,"""akční a dobrodružné komiksy""",255,0.003922,256
"""action and adventure comics""",1,"""action and adventure comics""",252,0.003968,253
"""superhrdinské komiksy""",1,"""superhrdinské komiksy""",221,0.004525,222
"""superhero comics""",1,"""superhero comics""",219,0.004566,220
"""autorské knihy""",1,"""autorské knihy""",164,0.006098,165
"""authors' books""",1,"""authors' books""",162,0.006173,163


In [13]:
vyvoj.filter(pl.col("celkem") >= 100).tail(150)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""humoristické příběhy""",44,"""humoristické příběhy""",128,0.34375,172
"""detective stories""",27,"""detective stories""",78,0.346154,105
"""Czech prose""",147,"""Czech prose""",420,0.35,567
"""obrazové publikace""",372,"""obrazové publikace""",1048,0.354962,1420
"""české romány""",751,"""české romány""",2083,0.360538,2834
"""texts""",38,"""texts""",101,0.376238,139
"""popular works""",742,"""popular works""",1949,0.380708,2691
"""fotografické publikace""",556,"""fotografické publikace""",1453,0.382657,2009
"""autobiografické prózy""",44,"""autobiografické prózy""",113,0.389381,157
"""catalogs""",66,"""catalogs""",168,0.392857,234


In [14]:
df.select(pl.col("653_a")).drop_nulls().sample(50)

653_a
list[str]
"[""národohospodářské subjekty"", ""podniková ekonomika"", … ""podnikový management""]"
"[""CrossFit""]"
"[""elektroerozivní drátové řezání""]"
"[""napoleonika""]"
"[""agrární struktury"", ""ekonomické struktury"", … ""vyspělé země""]"
"[""milost"", ""ospravedlnění"", … ""zásluhy""]"
"[""Nový zákon"", ""Starý zákon"", ""bible""]"
"[""ášramy""]"
"[""dějiny"", ""středověk""]"
"[""chemie"", ""ekologie"", … ""životní prostředí""]"


In [15]:
vyvoj2 = df.filter(pl.col("rok").is_between(2000,2003)).explode("653_a").group_by("653_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("653_a").group_by("653_a").len(), on="653_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")
vyvoj2.filter(pl.col("celkem") > 5)

653_a,len,653_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
,0,"""jazyková úroveň B1""",14,0.0,14
,0,,31951,0.0,31951
,0,"""literatura new adult""",50,0.0,50
,0,"""jazyková úroveň B2""",7,0.0,7
,0,"""jazyková úroveň A2""",14,0.0,14
,0,"""jazyková úroveň A1""",15,0.0,15
,31255,,0,0.0,31255
"""denní četba""",13,"""denní četba""",44,0.295455,57


In [16]:
vyvoj3 = df.filter(pl.col("rok").is_between(2000,2003)).explode("655_a").group_by("655_a").len().join(
    df.filter(pl.col("rok").is_between(2021,2024)).explode("655_a").group_by("655_a").len(), on="655_a", how="full"
).with_columns((pl.col("len") / pl.col("len_right")).alias("rozdil")).fill_null(0).with_columns((pl.col("len") + pl.col("len_right")).alias("celkem")).sort(by="rozdil")

In [17]:
vyvoj3.filter(pl.col("celkem") > 100).filter(pl.col('rozdil') == 0).sort(by='celkem',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""Love stories""",1258,,0,0.0,1258
"""Textbooks""",1125,,0,0.0,1125
"""Detective and mystery stories""",491,,0,0.0,491
"""Short stories, Czech""",429,,0,0.0,429
"""Science fiction""",393,,0,0.0,393
,0,"""manga""",339,0.0,339
"""Adventure stories""",333,,0,0.0,333
,307,,0,0.0,307
,0,"""young adult literature""",281,0.0,281
,0,"""literatura young adult""",280,0.0,280


In [100]:
vyvoj3.filter(pl.col("celkem") > 200).filter(pl.col('rozdil') > 1).sort(by='rozdil',descending=True)

655_a,len,655_a_right,len_right,rozdil,celkem
str,u32,str,u32,f64,u32
"""milostné povídky""",492,"""milostné povídky""",36,13.666667,528
"""lyrická poezie""",187,"""lyrická poezie""",20,9.35,207
"""právní předpisy""",225,"""právní předpisy""",32,7.03125,257
"""učebnice vysokých škol""",3224,"""učebnice vysokých škol""",533,6.04878,3757
"""studie""",2336,"""studie""",471,4.95966,2807
"""vědecko-fantastické povídky""",229,"""vědecko-fantastické povídky""",50,4.58,279
"""textbooks (higher)""",2249,"""textbooks (higher)""",529,4.251418,2778
"""učebnice""",392,"""učebnice""",113,3.469027,505
"""studies""",1454,"""studies""",456,3.188596,1910
"""dobrodružné povídky""",314,"""dobrodružné povídky""",101,3.108911,415


In [102]:
top = ['komiksy','young adult',"erotické romány"]
flop = ["učebnice","slovníky","encyklopedie"]

In [44]:
def podil_zanru(zanr):
    return df.explode("655_a").filter(
        pl.col("655_a").str.contains("(?i)" + zanr)
    ).group_by(
        "rok"
    ).len(
    ).join(df.drop_nulls(subset=["655_a"]).group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).with_columns(pl.lit(zanr).alias("zanr"))

In [21]:
df.explode("655_a").filter(pl.col('655_a') == "erotické romány").group_by('245_a').len().sort(by='len',descending=True)

245_a,len
str,u32
"""Hříšné odstíny vášně""",4
"""Jezdci apokalypsy""",4
"""Královská sága plná sexu""",4
"""Twisted""",4
"""Barvy lásky""",4
"""Vlci zvěrokruhu""",4
"""Hříšní vévodové""",3
"""This man trilogie""",3
"""Sin trilogy""",3
"""Bratři Steelové""",3


In [46]:
podil_zanru("komiks")

pocet,rok,len_right,podil,zanr
u32,i64,u32,f64,str
4,1989,3114,0.001285,"""komiks"""
6,1990,3219,0.001864,"""komiks"""
8,1991,3599,0.002223,"""komiks"""
8,1992,4450,0.001798,"""komiks"""
5,1993,5949,0.00084,"""komiks"""
11,1994,6066,0.001813,"""komiks"""
8,1995,6783,0.001179,"""komiks"""
6,1996,7011,0.000856,"""komiks"""
10,1997,7394,0.001352,"""komiks"""
10,1998,7207,0.001388,"""komiks"""


In [104]:
top_data = pl.concat([podil_zanru(z) for z in top])
flop_data = pl.concat([podil_zanru(z) for z in flop])

In [108]:
top_graf = alt.Chart(
    alt_friendly(top_data),title="Rostoucí kategorie…", width=300, height=80
).mark_area().encode(
    alt.X("rok:T", title=None),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6'), ),
    alt.Row("zanr", title=None, sort=top,
           header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'))
).resolve_axis(y="independent").resolve_scale(y="independent")

flop_graf = alt.Chart(
    alt_friendly(flop_data),title="…upadající kategorie", width=300, height=80
).mark_area().encode(
    alt.X("rok:T", title=None),
    alt.Y("pocet", title=None, axis=alt.Axis(orient='right', domainOpacity=0, tickColor='#DCDDD6'), ),
    alt.Row("zanr", title=None, sort=flop,
           header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='middle', labelFont='Asap'))
).resolve_axis(y="independent").resolve_scale(y="independent")

alt.vconcat(top_graf, flop_graf).configure_view(stroke='transparent')

In [None]:
podil_zanru("pohádky")

In [None]:
podil_zanru("erotick")

In [None]:
podil_zanru("dívčí romány")

In [None]:
podil_zanru("populárně-naučné publikace")

In [None]:
def grafik(z, funkce=podil_zanru):
    return alt.Chart(alt_friendly(funkce(z))).mark_line().encode(
        alt.X("rok:T",axis=alt.Axis(title=None)),
        alt.Y('podil:Q',axis=alt.Axis(
            labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6',title=None
        ))
    ).configure_view(stroke='transparent')

In [None]:
grafik('erotické')

In [None]:
grafik('CD-ROM')

In [None]:
grafik("dvojjazyčná vydání")

In [None]:
grafik("fantasy romány"	)

In [None]:
grafik("učebnice základních škol")

In [None]:
grafik("aforismy")

In [None]:
grafik("rusk")

In [None]:
grafik("young adult")

In [None]:
grafik("příručky")

In [None]:
grafik("dívčí romány")

In [None]:
grafik("komiks")

In [None]:
grafik("detektiv")

In [None]:
grafik("rozhovory")

In [None]:
grafik("deníky")

In [None]:
grafik("autobiogr")

In [None]:
grafik("encyclopedias")

In [None]:
grafik("učebnice vysokých škol")

In [None]:
grafik("kuchař")

## Kuchařky

In [None]:
kucharky = df.explode("655_a").filter(
        pl.col("655_a").str.contains("kuchař")
    )

In [None]:
kucharky.sample(10)

In [None]:
def kuchyne(slovo):
    return kucharky.filter(
        pl.col("245_a").str.contains("(?i)" + slovo) | pl.col("245_a").str.contains("(?i)" + slovo)
    ).group_by(
        "rok"
    ).len(
    ).join(kucharky.group_by("rok").len(), on="rok", how="right").with_columns((pl.col("len") / pl.col("len_right")).alias("podil")).sort(by="rok").rename({'len':'pocet'}).fill_null(0)

In [None]:
kuchyne('barbe')

In [None]:
grafik('barbe', funkce=kuchyne)

In [None]:
grafik('vegan', funkce=kuchyne)

In [None]:
grafik('sous ', funkce=kuchyne)

In [None]:
grafik('svačin', funkce=kuchyne)

In [None]:
grafik('pečiv', funkce=kuchyne)

In [None]:
grafik('chleb', funkce=kuchyne)

In [None]:
grafik('peče', funkce=kuchyne)

In [None]:
grafik('diabet', funkce=kuchyne)

In [None]:
grafik('gril', funkce=kuchyne)

In [None]:
grafik('cukrov', funkce=kuchyne)

In [None]:
grafik('zdrav', funkce=kuchyne)

In [None]:
grafik('bez ', funkce=kuchyne)

In [None]:
kucharky.filter(pl.col("245_a").str.contains("pro")).with_columns(pl.col("245_a").map_elements(lambda x: x.split("pro")[1].strip())).group_by('245_a').len().sort(by='len',descending=True)

In [None]:
kucharky.filter(pl.col("245_a").str.contains(" s ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" s ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

In [None]:
kucharky.filter(pl.col("245_a").str.contains(" bez ")).with_columns(pl.col("245_a").map_elements(lambda x: x.split(" bez ")[1].split(" ")[0].strip())).group_by('245_a').len().sort(by='len',descending=True)

In [None]:
grafik(' s ', funkce=kuchyne)

In [None]:
grafik(' bez ', funkce=kuchyne)

In [None]:
grafik('frit', funkce=kuchyne)

In [None]:
grafik('indi', funkce=kuchyne)

In [None]:
grafik('japon', funkce=kuchyne)

In [None]:
grafik('vietn', funkce=kuchyne)

In [None]:
grafik('pán[ev]', funkce=kuchyne)

In [None]:
grafik('mikrov', funkce=kuchyne)

In [None]:
grafik('gril', funkce=kuchyne)

In [None]:
grafik('sex', funkce=kuchyne)

In [None]:
grafik('babi', funkce=kuchyne)

In [None]:
grafik('pomaz', funkce=kuchyne)

In [None]:
grafik('cukr', funkce=kuchyne)

In [None]:
grafik('hrní', funkce=kuchyne)

In [None]:
grafik('hubn', funkce=kuchyne)

In [None]:
grafik('omáč', funkce=kuchyne)

In [None]:
grafik('pol[íé]v', funkce=kuchyne)

In [None]:
grafik('pomaz', funkce=kuchyne)

In [None]:
grafik('keto', funkce=kuchyne)

In [None]:
grafik('paleo', funkce=kuchyne)

In [None]:
grafik('houb', funkce=kuchyne)

In [None]:
grafik('(kvas|kvaš|ferment)', funkce=kuchyne)

In [None]:
grafik('makrobiot', funkce=kuchyne)

In [None]:
grafik('bílkov', funkce=kuchyne)

In [None]:
kucharky.select(pl.col("245_a")).to_series().to_list()