In [170]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [171]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok") >= 1800)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["008","100_a","245_a","245_p"], keep="first")
print(len(df))

794838
705250


In [172]:
df = df.filter(pl.col("rok") <= 2024)

In [173]:
bestsellery = pd.read_json(os.path.join("data","sckn_zebricky.json"))
bestsellery = pl.from_pandas(bestsellery)
bestsellery
isbn_bestselleru = set(bestsellery.select(pl.col("sckn_isbn")).to_series().to_list())
df_isbn = df.explode("020_a").drop_nulls(subset=['020_a']).with_columns(pl.col("020_a").map_elements(lambda x: x.replace('-','')).alias("isbn"))

## Vazby

In [175]:
def podil_vazeb(frejm):
    vazby = frejm.filter(pl.col("vazba") == 'brožovaná').group_by("rok").len().sort(by="rok").rename({"len":"brožovaná"})
    vazby = vazby.join(df.filter(pl.col("vazba").is_in(['brožovaná','pevná'])).group_by("rok").len().sort(by="rok").rename({"len":"jakákoliv"}), left_on="rok", right_on="rok", how="left")
    vazby = vazby.join(df.group_by("rok").len().sort(by="rok").rename({"len":"vše"}), left_on="rok", right_on="rok", how="left")
    vazby = vazby.with_columns((pl.col("brožovaná") / pl.col("jakákoliv")).alias("podíl"))
    vazby = vazby.with_columns((pl.col("jakákoliv") / pl.col("vše")).alias("vyplněnost"))
    return vazby

In [176]:
podil_vazeb(df)

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1801,8,11,12,0.727273,0.916667
1802,4,10,12,0.4,0.833333
1803,7,11,16,0.636364,0.6875
1804,8,12,17,0.666667,0.705882
1805,9,14,16,0.642857,0.875
1806,4,7,9,0.571429,0.777778
1807,8,13,18,0.615385,0.722222
1808,7,9,14,0.777778,0.642857
1809,6,12,15,0.5,0.8
1810,11,12,17,0.916667,0.705882


In [177]:
podil_vazeb(df.filter(pl.col("stran") > 50))

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1801,3,11,12,0.272727,0.916667
1802,1,10,12,0.1,0.833333
1803,2,11,16,0.181818,0.6875
1804,6,12,17,0.5,0.705882
1805,6,14,16,0.428571,0.875
1806,4,7,9,0.571429,0.777778
1807,5,13,18,0.384615,0.722222
1808,5,9,14,0.555556,0.642857
1809,2,12,15,0.166667,0.8
1810,7,12,17,0.583333,0.705882


In [178]:
df.explode("655_a").group_by("655_a").len().sort(by="len",descending=True).head(100)

655_a,len
str,u32
,219885
"""příručky""",51782
"""učebnice vysokých škol""",42994
"""publikace pro děti""",36285
"""handbooks and manuals""",30675
"""children's literature""",28022
"""studie""",23275
"""monografie""",23142
"""textbooks (higher)""",19977
"""monographs""",18527


In [179]:
podil_vazeb(df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "milostné povídky"))

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1830,1,44,56,0.022727,0.785714
1923,1,1354,3558,0.000739,0.380551
1981,1,1399,4298,0.000715,0.3255
1990,1,4872,5220,0.000205,0.933333
1992,9,5843,6371,0.00154,0.917124
1993,187,6469,7164,0.028907,0.902987
1994,526,6613,7436,0.07954,0.889322
1995,501,7074,8128,0.070823,0.870325
1996,300,6847,8513,0.043815,0.804299
1997,252,7187,9014,0.035063,0.797315


In [180]:
podil_vazeb(df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "pracovní sešity"))

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1937,1,1494,4732,0.000669,0.315723
1938,2,1270,4018,0.001575,0.316078
1975,1,1213,4129,0.000824,0.293776
1979,1,1687,4394,0.000593,0.383933
1983,1,3994,4341,0.00025,0.920065
1986,3,4584,4680,0.000654,0.979487
1988,1,4684,4850,0.000213,0.965773
1989,3,4677,4969,0.000641,0.941236
1990,2,4872,5220,0.000411,0.933333
1991,5,5288,5705,0.000946,0.926906


In [181]:
podil_vazeb(df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "česká poezie"))

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1802,2,10,12,0.2,0.833333
1803,1,11,16,0.090909,0.6875
1804,2,12,17,0.166667,0.705882
1805,1,14,16,0.071429,0.875
1810,1,12,17,0.083333,0.705882
1813,2,14,16,0.142857,0.875
1814,1,18,23,0.055556,0.782609
1816,2,17,22,0.117647,0.772727
1817,3,18,21,0.166667,0.857143
1821,1,46,54,0.021739,0.851852


In [182]:
romany_isbn = df_isbn.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "české romány").select(pl.col('isbn')).to_series().to_list()
len(romany_isbn)

8955

In [183]:
romanove_vazby = podil_vazeb(df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "české romány"))
romanove_vazby

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
1843,1,70,86,0.014286,0.813953
1853,1,121,142,0.008264,0.852113
1854,1,119,132,0.008403,0.901515
1855,4,86,96,0.046512,0.895833
1857,1,112,137,0.008929,0.817518
1858,2,157,175,0.012739,0.897143
1860,2,131,148,0.015267,0.885135
1861,3,163,184,0.018405,0.88587
1862,1,213,236,0.004695,0.902542
1863,2,281,322,0.007117,0.872671


In [184]:
alt.Chart(alt_friendly(romanove_vazby.filter(pl.col("rok").is_between(1990,2024)))).mark_line().encode(alt.X("rok:T"), alt.Y("podíl:Q"))

In [185]:
bestsellerove_romanove_vazby = podil_vazeb(
    df.sort(by="rok").unique(
        subset=["100_a","245_a"]
    ).explode("655_a").filter(
        pl.col("655_a") == "české romány"
).explode("020_a").drop_nulls(
    subset=['020_a']
).with_columns(
    pl.col("020_a").map_elements(
        lambda x: x.replace('-','')
    ).alias("isbn")
).filter(
    pl.col("isbn").is_in(isbn_bestselleru)
))
bestsellerove_romanove_vazby

rok,brožovaná,jakákoliv,vše,podíl,vyplněnost
i64,u32,u32,u32,f64,f64
2002,2,8417,10210,0.000238,0.824388
2004,1,9244,10953,0.000108,0.84397
2005,2,9781,11303,0.000204,0.865345
2006,3,10294,11924,0.000291,0.863301
2008,3,11286,13183,0.000266,0.856103
2009,3,10708,12382,0.00028,0.864804
2010,3,10989,12779,0.000273,0.859926
2011,1,11000,12661,9.1e-05,0.86881
2012,3,10860,12690,0.000276,0.855792
2014,5,10829,12656,0.000462,0.855642


In [186]:
alt.Chart(alt_friendly(bestsellerove_romanove_vazby.filter(pl.col("rok").is_between(1990,2024)))).mark_line().encode(alt.X("rok:T"), alt.Y("podíl:Q"))

In [187]:
bestsellery = pd.read_json(os.path.join("data","sckn_zebricky.json"))
bestsellery = pl.from_pandas(bestsellery)
bestsellery
isbn_bestselleru = set(bestsellery.select(pl.col("sckn_isbn")).to_series().to_list())
df_isbn = df.explode("020_a").drop_nulls(subset=['020_a']).with_columns(pl.col("020_a").map_elements(lambda x: x.replace('-','')).alias("isbn"))

In [188]:
df_isbn.filter(pl.col('isbn').is_in(isbn_bestselleru))

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,655_ind2,655_a,655_7,655_2,655_ind1,655_x,655_z,655_y,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba,isbn
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],str,list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str,str
"""1""","""Rees, Laurence,""","""xx0030651""","[""aut""]","""1957-""",,,,,"""nkc20092008462""",""" nam a22 a 4500""","""091029s2009 xr abcff 0…","""(váz.)""",,"""978-80-242-2550-0""",,,,,,,"""1""","""0""","""Druhá světová válka za zavřený…","""Stalin, nacisté a Západ /""","""Laurence Rees ; [z anglického …",,,,,,"[""431 s., [16] s. obr. příl. :""]","[""il., mapy, portréty ;""]","[""24 cm""]",,,,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2009,431,"""pevná""","""9788024225500"""
"""1""","""Niven, Jennifer,""","""xx0172884""","[""aut""]","""1968-""",,,,,"""nkc20172878338""",""" nam a22 i 4500""","""170202s2017 xr d 0…","""(brožováno) :""","[""Kč 279,00""]","""978-80-7549-264-7""",,,,,,,"""1""","""0""","""Tíha vesmíru""","""okouzlující love story, která …","""Jennifer Nivenová ; z anglické…",,,,,,"[""406 stran ;""]",,"[""20 cm""]",,,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""publikace pro mládež"", … ""juvenile literature""]","[""fd131796"", ""fd133157"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1""]","[""Křivánková, Anna,""]","[""trl""]","[""1982-""]","[""jo2011555995""]",,,,,,,,,,,,,,,,,,,,,,2017,406,"""brožovaná""","""9788075492647"""
"""1""","""Kallentoft, Mons,""","""xx0133897""","[""aut""]","""1968-""",,,,,"""nkc20112175479""",""" nam a22 a 4500""","""110317s2011 xr g 0…","""(váz.)""",,"""978-80-7294-416-3""",,,,,,,"""1""","""0""","""Zimní oběť""",,"""Mons Kallentoft ; [ze švédskéh…",,,,,,"[""440 s. ;""]",,"[""21 cm""]",,,,"[""7"", ""7"", … ""9""]","[""švédské romány"", ""detektivní romány"", … ""detective novels""]","[""fd133923"", ""fd132010"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,440,"""pevná""","""9788072944163"""
"""1""","""Gott, Karel,""","""jk01032549""","[""aut""]","""1939-2019""",,,,,"""nkc20213321597""",""" nam a22 i 4500""","""210428s2021 xr ach e 0…","""(vázáno)""",,"""978-80-908221-0-8""",,,,,,,"""1""","""0""","""Má cesta za štěstím""",,"""Karel Gott""",,,,,,"[""690 stran :""]","[""ilustrace (převážně barevné), portréty, faksimile ;""]","[""31 cm""]",,,,"[""7"", ""7"", … ""9""]","[""autobiografie"", ""obrazové publikace"", … ""pictorial works""]","[""fd131855"", ""fd132947"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,690,"""pevná""","""9788090822108"""
"""1""","""Mina, Denise,""","""xx0026332""","[""aut""]","""1966-""",,,,,"""nkc20132474748""",""" nam a22 a 4500""","""130711s2013 xr g 0…","""(váz.)""",,"""978-80-87697-02-3""",,,,,,,"""1""","""0""","""Konec vosí sezóny""",,"""Denise Mina ; [přeložil Ivan N…",,,,,,"[""406 s. ;""]",,"[""22 cm""]",,,,"[""7"", ""7"", … ""9""]","[""skotské romány (anglicky)"", ""detektivní romány"", … ""detective novels""]","[""fd133439"", ""fd132010"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,406,"""pevná""","""9788087697023"""
"""1""","""Chrastilová, Brigita,""","""xx0011624""","[""aut""]","""1945-""",,,,,"""cpk20031250559""",""" nam a22 a 4500""","""031107s2003 xr f 0…","""(váz.)""",,"""80-86395-63-4""",,,,,,,"""1""","""0""","""Prezident republiky Václav Hav…",,"""Brigita Chrastilová, Petr Mike…",,,,,,"[""xviii, 565 s. ;""]",,"[""21 cm""]",,,,"[""7"", ""9""]","[""monografie"", ""monographs""]","[""fd132842"", null]","[""czenas"", ""eczenas""]",,,,,"[""1""]","[""Mikeš, Petr,""]","[""aut""]","[""1978-""]","[""ola2012735698""]",,,,,,,,,,,,,,,,,,,,,,2003,565,"""pevná""","""8086395634"""
"""1""","""Vaňková, Ludmila,""","""jk01141586""","[""aut""]","""1927-2022""",,,,,"""nkc20112180865""",""" nam a22 a 4500""","""110405s2011 xr g 0…","""(váz.) :""","[""Kč 305,00""]","""978-80-7244-300-0""",,,,,,,"""1""","""0""","""Dítě z Apulie""",,"""Ludmila Vaňková""",,,,,,"[""412 s. ;""]",,"[""21 cm""]",,,,"[""7"", ""7"", … ""9""]","[""české romány"", ""historické romány"", … ""biographical novels""]","[""fd133974"", ""fd132414"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,412,"""pevná""","""9788072443000"""
"""1""","""Peck, M. Scott""","""jn19990006438""","[""aut""]","""1936-2005""","""(Morgan Scott),""",,,,"""nkc20112202783""",""" nam a22 a 4500""","""110916s2011 xr a b 0…","""(váz.)""",,"""978-80-262-0013-0""",,,,,,,"""1""","""0""","""Přátelská sněhová vločka""","""obyčejný příběh o neobyčejných…","""M. Scott Peck ; s ilustracemi …",,,,,,"[""62 s. :""]","[""barev. il. ;""]","[""13 x 14 cm""]",,,,"[""7"", ""7"", … ""9""]","[""americké novely"", ""publikace pro děti"", … ""children's literature""]","[""fd131792"", ""fd133156"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1""]","[""Peck, Christopher Scott""]","[""ill""]",,"[""osa2011669674""]",,,,,,,,,,,,,,,,,,,,,,2011,62,"""pevná""","""9788026200130"""
"""1""","""Lindgren, Astrid,""","""jn19990005095""","[""aut""]","""1907-2002""",,,,,"""cpk20021087780""",""" nam a22 a 4500""","""020408s2002 xr a b 0…","""(váz.) :""","[""Kč 185,00""]","""80-00-01011-9""",,,,,,,"""1""","""0""","""Děti z Bullerbynu""",,"""Astrid Lindgrenová ; [ze švéds…",,,,,,"[""215 s. :""]","[""barev. il. ;""]","[""25 cm""]",,,,"[""7"", ""7"", … ""9""]","[""švédské povídky"", ""publikace pro děti"", … ""Short stories, Swedish""]","[""fd133922"", ""fd133156"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,"[""1"", ""1""]","[""Mencák, Břetislav,"", ""Zmatlíková, Helena,""]","[""trl"", ""ill""]","[""1903-1981"", ""1923-2005""]","[""jk01081188"", ""jk01152754""]",,,,,,,,,,,,,,,,,,,,,,2002,215,"""pevná""","""8000010119"""
"""1""","""Riordan, Rick,""","""xx0103318""","[""aut""]","""1964-""",,,,,"""nkc20102094395""",""" cam a22 a 4500""","""100308s2010 xr d 0…","""(váz.)""",,"""978-80-253-0974-2""",,,,,,,"""1""","""0""","""Percy Jackson""",,"""napsal Rick Riordan ; [z angli…",,"""Moře nestvůr""",,,,"[""304 s. ;""]",,"[""22 cm""]",,,,"[""7"", ""7"", … ""9""]","[""americké romány"", ""fantasy romány"", … ""juvenile literature""]","[""fd131796"", ""fd184199"", … null]","[""czenas"", ""czenas"", … ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2010,304,"""pevná""","""9788025309742"""


In [189]:
alt.Chart(alt_friendly(romanove_vazby.filter(pl.col("rok").is_between(1990,2024)))).mark_line().encode(alt.X("rok:T"), alt.Y("podíl:Q"))

## Rozsah

In [191]:
rozsah = df.filter(~pl.col("stran").is_null()).group_by("rok").len().sort(by="rok").rename({"len":"vyplněno"})
rozsah = rozsah.join(df.group_by("rok").len().sort(by="rok").rename({"len":"vše"}), left_on="rok", right_on="rok", how="left")
rozsah = rozsah.with_columns((pl.col("vyplněno") / pl.col("vše")).alias("vyplněnost"))
rozsah

rok,vyplněno,vše,vyplněnost
i64,u32,u32,f64
1800,1,1,1.0
1801,12,12,1.0
1802,12,12,1.0
1803,16,16,1.0
1804,17,17,1.0
1805,16,16,1.0
1806,9,9,1.0
1807,18,18,1.0
1808,14,14,1.0
1809,15,15,1.0


In [192]:
def streni_tloustka(frejm):
    return frejm.group_by("rok").agg(pl.col("stran").median()).sort(by="rok").with_columns(
                   pl.col("rok").map_elements(
                       lambda x: datetime.date(year=int(x), month=1, day=1), 
                       return_dtype=pl.Date
                   ).cast(pl.Datetime))

In [193]:
tloustky_romanu = df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "české romány").filter(pl.col("rok") >= 1990).with_columns(
                   pl.col("rok").map_elements(
                       lambda x: datetime.date(year=int(x), month=1, day=1), 
                       return_dtype=pl.Date
                   ).cast(pl.Datetime))

In [194]:
tloustky_romanu.select(pl.col("stran"))

stran
i64
223
363
444
145
247
237
179
164
125
156


In [195]:
tloustky_romanu.sort(by='stran',descending=True).select(pl.col(["rok","100_a","245_a","stran"]))

rok,100_a,245_a,stran
datetime[μs],str,str,i64
2021-01-01 00:00:00,"""Motýl, Petr,""","""Šatna a klášter""",1193
2005-01-01 00:00:00,"""Slanská, Mariana""","""Tóny v trní""",1101
2014-01-01 00:00:00,"""Novotný, František,""","""Valhala""",1086
2024-01-01 00:00:00,"""BasthArt,""","""Bastard""",1085
2024-01-01 00:00:00,"""Beneš, Filip,""","""Černý měsíc""",1000
2019-01-01 00:00:00,"""Hyrsch, Áron,""","""Hora rabínova""",929
2023-01-01 00:00:00,"""Šalda, Ladislav,""","""Ta hra neměla pravidla""",868
2020-01-01 00:00:00,"""Kučera, Dalibor,""","""Poté""",836
2018-01-01 00:00:00,"""Dobeš, Jára,""","""Stigmata karmy""",816
2017-01-01 00:00:00,"""Svobodová, Jitka""","""Přemyslova krev""",812


In [196]:
do_grafu = tloustky_romanu.filter(pl.col("stran") >= 30).group_by("rok").agg(pl.col("stran").quantile(0.25).alias('malo')).join(
    tloustky_romanu.filter(pl.col("stran") >= 30).group_by("rok").agg(pl.col("stran").quantile(0.75).alias('hodne')), on="rok"
).join(
    tloustky_romanu.filter(pl.col("stran") >= 30).group_by("rok").agg(pl.col("stran").median().alias('median')), on="rok"
).join(
    tloustky_romanu.filter(pl.col("stran") >= 30).group_by("rok").agg(pl.col("stran").min().alias('min')), on="rok"
).join(
    tloustky_romanu.filter(pl.col("stran") >= 30).group_by("rok").agg(pl.col("stran").max().alias('max')), on="rok"
).to_pandas()
do_grafu

Unnamed: 0,rok,malo,hodne,median,min,max
0,1991-01-01,120.0,253.0,191.0,40,486
1,2023-01-01,218.0,344.0,272.0,49,868
2,2020-01-01,212.0,321.0,259.5,99,836
3,2010-01-01,188.0,285.0,223.0,91,786
4,2017-01-01,206.0,319.0,254.0,96,812
5,2004-01-01,150.0,285.0,214.0,82,776
6,2018-01-01,214.0,320.0,253.0,44,816
7,2005-01-01,160.0,274.0,215.0,61,1101
8,2009-01-01,159.0,286.0,223.0,52,634
9,2019-01-01,209.0,335.0,260.0,94,929


In [197]:
streni_tloustka(df.sort(by="rok").unique(subset=["100_a","245_a"]).explode("655_a").filter(pl.col("655_a") == "české romány"))

rok,stran
datetime[μs],f64
1814-01-01 00:00:00,71.0
1818-01-01 00:00:00,196.0
1843-01-01 00:00:00,152.0
1848-01-01 00:00:00,161.0
1853-01-01 00:00:00,173.0
1854-01-01 00:00:00,303.0
1855-01-01 00:00:00,223.5
1857-01-01 00:00:00,207.0
1858-01-01 00:00:00,200.0
1860-01-01 00:00:00,249.0


In [198]:
rozptyl = alt.Chart(do_grafu, title=alt.Title("Jak bobtná český román", subtitle=["Do pásu se rozsahem vejde 50 % knih. Tečky jsou ty nejtlustší."]), width=300, height=400).mark_area(opacity=0.5).encode(
       alt.X("rok:T", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')).title(None),
    alt.Y("hodne:Q", axis=alt.Axis(domainOpacity=0, format='d', tickColor='#DCDDD6')).title(None),
    alt.Y2("malo:Q")
)

median = alt.Chart(do_grafu, width=300).mark_line().encode(
       alt.X("rok:T", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')),
    alt.Y("median:Q", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'))
)

kraje = alt.Chart(do_grafu, width=300).mark_point(size=2).encode(
       alt.X("rok:T", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')),
    alt.Y("max:Q", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'))
)


finale = (rozptyl + median + kraje).configure_view(stroke='transparent')

finale

In [199]:
gr = pd.read_csv(os.path.join('data','goodreads-hodnoceni.csv'))
gr = pl.from_pandas(gr)
gr = gr.sort(by="GR_date").unique(subset=['GR_isbn'],keep='last')
dk = pd.read_csv(os.path.join('data','databazeknih-hodnoceni.csv'))
dk = pl.from_pandas(dk)
dk = dk.sort(by="DK_date").unique(subset=['DK_isbn'],keep='last')

In [242]:
dk

DK_isbn,DK_date,DK_titul,DK_rating,DK_ratings_count,DK_tags,DK_Právě_čtených,DK_Přečtených,DK_Čtenářské_výzvě,DK_Knihotéce,DK_Chystám_se_číst,DK_Chci_si_koupit,DK_dalších_seznamech,DK_Doporučených,DK_autorstvo,DK_vyslo
f64,str,str,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,str,f64
9.7880e12,"""2024-12-09 03:44:30""","""Můj rok 1963""",60.0,1.0,"""['Literatura česká', 'Literatu…",,3.0,,3.0,3.0,4.0,,,"""['Alena Breuerová']""",2023.0
9.7880e12,"""2024-12-09 03:42:29""","""Likvidátor""",84.0,67.0,"""['Literatura česká', 'Fantasy'…",4.0,74.0,13.0,70.0,30.0,19.0,3.0,5.0,"""['Honza Slívka']""",2023.0
9.7881e12,"""2024-12-09 03:36:12""","""Luftwaffe 1935""",55.0,4.0,"""['Literatura naučná', 'Histori…",,6.0,,6.0,,,2.0,,"""['Marek Brzkovský']""",2022.0
9.7881e12,"""2024-12-09 03:57:18""","""Závistivá kráva""",100.0,1.0,"""['Literatura česká', 'Pro děti…",,3.0,,,,,,,"""['Tereza Moc Watkinsová']""",2024.0
9.7881e12,"""2024-12-09 03:56:24""","""Adam Kašpar""",80.0,1.0,"""['Obrazové publikace', 'Umění'…",,1.0,,,1.0,,,,"""['Adam Kašpar', 'Barbora Kundr…",2023.0
9.7881e12,"""2024-12-09 03:42:10""","""V šedi""",89.0,14.0,"""['Romány', 'Literatura česká']""",,18.0,4.0,4.0,11.0,4.0,,3.0,"""['Milada Střítezská']""",2023.0
9.7880e12,"""2024-12-09 03:56:43""","""Příběhy tisíce a jedné noci""",82.0,117.0,"""['Literatura česká', 'Pro děti…",6.0,200.0,13.0,143.0,25.0,7.0,2.0,7.0,"""['Eduard Petiška']""",2024.0
9.7881e12,"""2024-12-09 03:41:38""","""Pod křídly Velkého krkavce""",96.0,5.0,"""['Mytologie', 'Historie']""",,6.0,,3.0,12.0,5.0,,1.0,"""['Ondřej Pivoda']""",2023.0
9.7881e12,"""2024-12-09 03:41:49""","""Muzikant na jezeře""",84.0,9.0,"""['Povídky', 'Literatura česká'…",,10.0,2.0,2.0,15.0,4.0,1.0,2.0,"""['Mirek Vostrý']""",2023.0
9.7881e12,"""2024-12-09 03:50:49""","""Fake news a politika klasickéh…",100.0,1.0,"""['Filozofie', 'Historie']""",,,,1.0,1.0,1.0,,1.0,"""['Ondřej Vinař']""",2023.0


In [240]:
df_gr = df_isbn.join(gr, left_on='isbn', right_on='GR_isbn',how='right')
df_gr.sample(10)

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_n,700_k,700_r,700_p,700_o,700_s,700_j,700_6,700_x,700_e,700_f,700_5,700_g,rok,stran,vazba,GR_isbn,GR_date,GR_title,GR_rating,GR_ratings_count,GR_reviews,GR_published,GR_1_stars,GR_2_stars,GR_3_stars,GR_4_stars,GR_5_stars,GR_pages,GR_format
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],str,list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],str,list[str],list[str],list[str],list[str],list[str],i64,i64,str,str,str,str,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,str
"""1""","""Lauder, Silvie,""","""mzk2006324307""","[""aut""]","""1978-""",,,,,"""nkc20233525585""",""" nam a22 i 4500""","""230620s2023 xr f 0…","""(brožováno)""",,"""978-80-275-1590-5""",,,,,,,"""1""","""0""","""V pasti pohlaví""","""o politice, péči, sexu, násilí…","""Silvie Lauder""",,,,,,"[""397 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,397.0,"""brožovaná""","""9788027515905""","""2024-12-09 03:24:36""","""V pasti pohlaví by Silvie Laud…",4.61,399.0,54.0,"""June 6, 2023""",0.0,4.0,19.0,107.0,269.0,400.0,"""paperback"""
"""1""","""Poláčková, Lenka""","""jo20221150966""","[""aut""]",,,,,,"""nkc20223486199""",""" nam a22 i 4500""","""230414s2023 xr g 0…","""(vázáno)""",,"""978-80-249-5021-1""",,,,,,,"""1""","""0""","""Co zpívá Sojka""","""historický román z doby Velké …","""Lenka Poláčková""",,,,,,"[""294 stran ;""]",,"[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,294.0,"""pevná""","""9788024950211""","""2024-12-09 02:06:04""","""Co zpívá Sojka by Lenka Poláčk…",3.73,11.0,3.0,"""January 1, 2023""",0.0,0.0,5.0,4.0,2.0,296.0,"""hardcover"""
"""1""","""Chalupa, Jiří,""","""jn19981000468""","[""aut""]","""1966-""",,,,,"""nkc20243583525""",""" nam a22 i 4500""","""240112s2023 xr abche 0…","""(vázáno)""",,"""978-80-7422-940-4""",,,,,,,"""1""","""0""","""Dějiny Španělska""",,"""Jiří Chalupa""",,,,,,"[""762 stran :""]","[""ilustrace, mapy, portréty, faksimile ;""]","[""22 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,762.0,"""pevná""","""9788074229404""","""2024-12-09 01:06:57""","""Dějiny Španělska by Jiří Chalu…",0.0,0.0,0.0,"""December 28, 2023""",0.0,0.0,0.0,0.0,0.0,768.0,"""hardcover"""
"""1""","""Sacká, Vlaďka,""","""xx0274874""","[""aut""]","""1985-""",,,,,"""nkc20233547331""",""" nam a22 i 4500""","""230908s2023 xr g 0…","""(brožováno)""",,"""978-80-277-2238-9""",,,,,,,"""1""","""0""","""Koruna otráveného stromu""",,"""Vlaďka Sacká""",,,,,,"[""402 stran ;""]",,"[""20 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,402.0,"""brožovaná""","""9788027722389""","""2024-12-09 01:03:08""","""Koruna otráveného stromu by Vl…",3.42,12.0,2.0,"""January 1, 2023""",0.0,2.0,4.0,5.0,1.0,408.0,"""paperback"""
"""1""","""Stráníková, Jarmila,""","""ola20211129086""","[""aut""]","""1988-""",,,,,"""nkc20233562778""",""" nam a22 i 4500""","""231024s2023 xr a g 0…","""(vázáno)""",,"""978-80-88467-81-6""",,,,,,,"""1""","""0""","""Balada pro Emily""",,"""Jarmila Stráníková""",,,,,,"[""412 stran :""]","[""ilustrace ;""]","[""21 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,412.0,"""pevná""","""9788088467816""","""2024-12-09 02:49:22""","""Balada pro Emily by Jarmila St…",3.69,45.0,11.0,"""October 21, 2023""",1.0,3.0,16.0,14.0,11.0,416.0,
"""1""","""Hrubý, Dan,""","""jn20000401090""","[""aut""]","""1968-""",,,,,"""nkc20233576589""",""" cam a22 i 4500""","""231120s2023 xr acehe 0…","""(brožováno)""",,"""978-80-908735-1-3""",,,,,,,"""1""","""0""","""Pražské příběhy""",,"""Dan Hrubý""",,"""Ztraceným světem Starého Města""",,,,"[""518 stran :""]","[""ilustrace, portréty, plány, faksimile ;""]","[""23 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,518.0,"""brožovaná""","""9788090873513""","""2024-12-09 01:55:53""","""Pražské příběhy: Ztraceným svě…",4.0,2.0,0.0,"""January 1, 2023""",0.0,0.0,0.0,2.0,0.0,520.0,"""paperback"""
"""1""","""Landsman, Dominik,""","""jo2015854588""","[""aut""]","""1985-""",,,,,"""nkc20233502374""",""" nam a22 i 4500""","""230615s2023 xr g 0…","""(vázáno)""",,"""978-80-249-5078-5""",,,,,,,"""1""","""0""","""Rybář Jarmil Koloušek""","""pravdivé příběhy od vody pro r…","""Dominik Landsman""",,,,,,"[""222 stran ;""]",,"[""19 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2023.0,222.0,"""pevná""","""9788024950785""","""2024-12-09 03:06:41""","""Rybář Jarmil Koloušek by Domin…",4.0,5.0,2.0,"""January 1, 2023""",0.0,1.0,1.0,0.0,3.0,224.0,"""hardcover"""
"""1""","""Kramerová, Daniela,""","""mzk2009495965""","[""aut""]","""1971-""",,,,,"""nkc20223442107""",""" nam a22 i 4500""","""220816s2022 xr a e 0…","""(vázáno)""",,"""978-80-7437-382-4""",,,,,,,"""1""","""0""","""Čestmír Suška""","""věci venku = things outside /""","""koncepce, texty: Daniela Krame…",,,,,,"[""199 stran :""]","[""barevné ilustrace ;""]","[""25 cm""]",,,,…,,,,,,,,,,,,,,,,,,,,,2022.0,199.0,"""pevná""","""9788074373824""","""2024-12-09 01:27:44""","""Cestmír Suška: Things Outside …",0.0,0.0,0.0,"""April 18, 2023""",0.0,0.0,0.0,0.0,0.0,200.0,"""paperback"""
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,"""9788011029623.0""","""2024-12-02 02:18:36""","""Je všechno dobrý? by Marie Kie…",0.0,0.0,0.0,"""January 1, 2023""",0.0,0.0,0.0,0.0,0.0,151.0,"""paperback"""
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,"""9788027801169""","""2024-12-09 02:45:51""","""Hlubiny města by Kristýna Sněg…",3.73,48.0,14.0,"""January 1, 2023""",1.0,4.0,14.0,17.0,12.0,472.0,"""hardcover"""


In [248]:
df_dk = df_isbn.join(dk.with_columns(pl.col('DK_isbn').cast(int).cast(str)), left_on='isbn', right_on='DK_isbn',how='right')

In [406]:
df_gr_graf = df_gr.filter(pl.col('GR_isbn').is_in(romany_isbn)).filter(pl.col("GR_rating") >= 0).filter(pl.col('GR_ratings_count') >= 5).filter(pl.col("stran") > 30)

In [418]:
gr_scatter = alt.Chart(
    alt_friendly(df_gr_graf),
    width=120
).mark_point(size=1, color='#81A9D5').encode(
    alt.X("stran:Q", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')),
    alt.Y("GR_rating:Q", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'))
)

# Add regression line
gr_regression = alt.Chart(
    alt_friendly(df_gr_graf)
).transform_regression(
    'stran', 'GR_rating', method="linear"
).mark_line(color='#445B78').encode(
    alt.X("stran:Q", title="počet stran", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')),
    alt.Y("GR_rating:Q", title="hodnocení na Goodreads", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'))
)

# Combine the plots
gr_final_chart = gr_scatter + gr_regression

gr_final_chart

In [402]:
df_dk_graf = df_dk.filter(pl.col('DK_isbn').is_in(romany_isbn)).filter(pl.col("DK_rating") >= 0).filter(pl.col('DK_ratings_count') >= 5).filter(pl.col("stran") > 30)

In [416]:
dk_scatter = alt.Chart(
    alt_friendly(df_dk_graf),

    width=120
).mark_point(size=1, color='#81A9D5').encode(
    alt.X("stran:Q"),
    alt.Y("DK_rating:Q")
)

# Add regression line
dk_regression = alt.Chart(
    alt_friendly(df_dk_graf)
).transform_regression(
    'stran', 'DK_rating'
).mark_line(color='#445B78').encode(
    alt.X("stran:Q", title="počet stran", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')),
    alt.Y("DK_rating:Q", title="hodnocení na Databázi knih", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'))
)

# Combine the plots
dk_final_chart = (dk_scatter + dk_regression)

dk_final_chart

In [412]:
len(df_gr_graf)

295

In [362]:
len(df_dk_graf)

616

In [420]:
alt.concat(gr_final_chart, dk_final_chart).properties(
    title=alt.Title("Dlouhé příběhy hodnotíme líp", subtitle=["Hodnocení českých románů z posledních let","na čtenářských platformách Goodreads a Databáze knih"])
)