In [65]:
import os
import datetime
import json
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin
from src.zjisti_vazbu import zjisti_vazbu
from src.me_to_neurazi import me_to_neurazi

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

with open(os.path.join('src','kredity.json'), 'r', encoding='utf-8') as kredity:
    kredity = json.loads(kredity.read())

In [105]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","650.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","653.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode("020_q").with_columns(pl.col("020_q").map_elements(zjisti_vazbu, return_dtype=str).alias('vazba'))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("stran") >= 30)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["100_a","245_a","245_p"], keep="first")
print(len(df))

795736
506565


In [127]:
df.filter(pl.col("rok") > 2020).explode('655_a').group_by("655_a").len().sort(by="len",descending=True)

655_a,len
str,u32
"""publikace pro děti""",3869
"""children's literature""",3811
"""monografie""",2474
"""monographs""",2402
"""příručky""",2363
"""handbooks and manuals""",2287
"""české romány""",2138
"""Czech fiction""",2119
"""populárně-naučné publikace""",2038
"""popular works""",1972


In [309]:
df.filter(pl.col("100_a").str.contains("Bellová, Bia"))

100_ind1,100_a,100_7,100_4,100_d,100_q,100_c,100_b,100_e,001,leader,008,020_q,020_c,020_a,020_z,022_a,022_y,022_z,022_ind1,022_l,245_ind1,245_ind2,245_a,245_b,245_c,245_n,245_p,245_h,245_f,245_s,300_a,300_b,300_c,300_e,300_f,300_3,…,653_a,655_ind2,655_a,655_7,655_2,655_ind1,655_y,655_z,700_ind1,700_a,700_4,700_d,700_7,700_t,700_q,700_l,700_ind2,700_c,700_b,700_i,700_m,700_k,700_n,700_r,700_p,700_o,700_s,700_j,700_x,700_e,700_f,700_5,700_9,700_g,rok,stran,vazba
str,str,str,list[str],str,str,list[str],str,str,str,str,str,str,list[str],list[str],list[str],str,list[str],list[str],str,str,str,str,str,str,str,list[str],str,str,str,str,list[str],list[str],list[str],list[str],str,str,…,list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],list[str],i64,i64,str
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20132474687""",""" cam a22 a 4500""","""131021s2013 xr g 0…","""(váz.) :""","[""Kč 199,00""]","[""978-80-7294-965-6""]",,,,,,,"""1""","""0""","""Celý den se nic nestane""",,"""Bianca Bellová""",,,,,,"[""138 s. ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české romány"", ""Czech fiction""]","[""fd133974"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2013,138,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20223407101""",""" cam a22 i 4500""","""220324s2022 xr g 0…","""(vázáno)""",,"[""978-80-275-1048-1""]",,,,,,,"""1""","""0""","""Ostrov""",,"""Bianca Bellová""",,,,,,"[""182 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české romány"", ""Czech fiction""]","[""fd133974"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2022,182,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20233556586""",""" nam a22 i 4500""","""231020s2023 xr g 0…","""(vázáno)""",,"[""978-80-275-1804-3""]",,,,,,,"""1""","""0""","""Transfer""",,"""Bianca Bellová""",,,,,,"[""133 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české novely"", ""Czech novellas""]","[""fd133969"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023,133,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20091867027""",""" nam a22 a 4500""","""090612s2009 xr g 0…","""(váz.)""",,"[""978-80-903997-5-4""]",,,,,,,"""1""","""0""","""Sentimentální román""",,"""Bianca Bellová""",,,,,,"[""109 s. ;""]",,"[""19 cm""]",,,,…,,"[""7"", ""9""]","[""české romány"", ""Czech fiction""]","[""fd133974"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2009,109,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20193122197""",""" nam a22 i 4500""","""190920s2019 xr g 0…","""(vázáno) :""","[""Kč 299,00""]","[""978-80-7577-962-5""]",,,,,,,"""1""","""0""","""Mona""",,"""Bianca Bellová""",,,,,,"[""163 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české novely"", ""Czech novellas""]","[""fd133969"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019,163,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20162834795""",""" cam a22 i 4500""","""160922s2016 xr g 0…","""(vázáno) :""","[""Kč 249,00""]","[""978-80-7491-771-4""]",,,,,,,"""1""","""0""","""Jezero""",,"""Bianca Bellová""",,,,,,"[""186 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české novely"", ""Czech novellas""]","[""fd133969"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2016,186,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20213312038""",""" cam a22 i 4500""","""210317s2021 xr g 0…","""(vázáno) :""","[""Kč 299,00""]","[""978-80-275-0592-0""]",,,,,,,"""1""","""0""","""Tyhle fragmenty""",,"""Bianca Bellová""",,,,,,"[""188 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české povídky"", ""Czech short stories""]","[""fd133971"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021,188,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20243634045""",""" cam a22 i 4500""","""240917s2024 xr g 0…","""(vázáno)""",,"[""978-80-275-2212-5""]",,,,,,,"""1""","""0""","""Neviditelný muž""",,"""Bianca Bellová""",,,,,,"[""169 stran ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české romány"", ""Czech fiction""]","[""fd133974"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024,169,"""pevná"""
"""1""","""Bellová, Bianca,""","""xx0087781""","[""aut""]","""1970-""",,,,,"""nkc20112185793""",""" cam a22 a 4500""","""110426s2011 xr g 0…","""(váz.)""",,"[""978-80-7294-495-8""]",,,,,,,"""1""","""0""","""Mrtvý muž""",,"""Bianca Bellová""",,,,,,"[""110 s. ;""]",,"[""21 cm""]",,,,…,,"[""7"", ""9""]","[""české novely"", ""Czech novellas""]","[""fd133969"", null]","[""czenas"", ""eczenas""]",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2011,110,"""pevná"""


In [99]:
def cena(retezec):
    try:
        return int(retezec.split("Kč")[1].split(',')[0].strip())
    except:
        return None

In [239]:
df_rom = df.explode('655_a').filter(pl.col('655_a') == 'české romány').explode("020_c").with_columns(pl.col("020_c").map_elements(lambda x: cena(x)).alias('cena'))

rok,cena
i64,f64
2016,291.428571
2017,340.790698
2018,311.604167
2019,335.383562
2020,361.935484
2021,331.0
2022,403.17
2023,427.613333
2024,410.91
2025,414.0


In [311]:
do_grafu = df_rom.filter(pl.col('rok') >= 2023).filter(pl.col("stran") <= 700).unique(subset=['245_a','100_a']).select(pl.col(['stran','245_a','cena',"rok"]))
print(len(do_grafu))
do_grafu = do_grafu.drop_nulls()
print(len(do_grafu))

1022
254


In [303]:
do_grafu

stran,245_a,cena,rok
i64,str,i64,i64
398,"""Hrdina od Jezera hrochů""",369,2024
409,"""Práh""",459,2024
249,"""Hotel času""",349,2024
428,"""Azurově šedá""",499,2024
259,"""Úděsné dny Adély H.""",329,2024
300,"""Baltazar""",440,2024
360,"""Ala Luna""",499,2024
377,"""Králokat""",399,2024
234,"""Zkáza ráje""",349,2025
472,"""Meč z kostí""",500,2024


In [301]:
do_grafu.with_columns((pl.col("stran") / 25).round() * 25).group_by('stran').agg(pl.col('cena').median()).sort('stran')

stran,cena
f64,f64
100.0,249.0
125.0,269.0
150.0,339.0
175.0,299.0
200.0,349.0
225.0,354.5
250.0,349.0
275.0,379.0
300.0,399.0
325.0,399.0


In [285]:
do_grafu.filter(pl.col("stran").is_between(390,410)).select(pl.col("cena")).median()

cena
f64
449.0


In [287]:
do_grafu.filter(pl.col("stran").is_between(290,310)).select(pl.col("cena")).median()

cena
f64
399.0


In [313]:
zaklad = alt.Chart(
    do_grafu.to_pandas(),
    title=alt.Title("Rozsahy českých románů a jejich doporučené ceny",
                    subtitle=[f"Graf zachycuje pouhou čtvrtinu románů vydaných od roku 2023; u zbylých tří čtvrtin",
                    f"není v datech informace o doporučené maloobchodní ceně."]),
    width=kredity['sirka'],
height=kredity['vyska_nizkych'] * 2
).mark_circle(color="#81A9D5", opacity=1).encode(
    alt.X('stran:Q', axis=alt.Axis(domainOpacity=0, labelExpr="datum.label + ' s.'", format='d', tickColor="white"), title=None),
    alt.Y('cena:Q', axis=alt.Axis(domainOpacity=0, labelExpr="datum.label + ' Kč'", format='d', tickColor="white", orient="right"), title=None)
)

cara = alt.Chart(
    do_grafu.to_pandas()
).transform_regression(
    'stran', 'cena', method="poly"
).mark_line(color='#445B78').encode(
    alt.X('stran:Q'),
    alt.Y('cena:Q')
)

vysledek = (zaklad + cara).configure_view(stroke='transparent')

vysledek

In [315]:
me_to_neurazi(vysledek, soubor="03_ceny", kredity=kredity['default'])

<figure>
    <a href="https://data.irozhlas.cz/knihy-grafy/03_ceny.svg" target="_blank">
    <img src="https://data.irozhlas.cz/knihy-grafy/03_ceny.svg" width="100%" alt="Omlouváme se, ale alternativní text se nepodařilo vygenerovat. Texty v grafu by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [295]:
nejkratsi_ctvrt = df_rom.filter(pl.col("rok") >= 2023).select(pl.col("stran").quantile(0.25))
nejkratsi_ctvrt

stran
f64
221.0


In [297]:
nejdelsi_ctvrt = df_rom.filter(pl.col("rok") >= 2023).select(pl.col("stran").quantile(0.75))
nejdelsi_ctvrt

stran
f64
350.0


In [299]:
do_grafu.filter(pl.col('stran').is_between(nejkratsi_ctvrt,nejdelsi_ctvrt)).filter(pl.col("rok").is_between(2024,2025)).select(pl.col('cena')).median()

cena
f64
384.0


In [307]:
df_rom.filter(pl.col('stran').is_between(nejkratsi_ctvrt,nejdelsi_ctvrt)).group_by('rok').agg(pl.col('cena').mean()).sort(by='rok').tail(10)

rok,cena
i64,f64
2016,278.608696
2017,294.0
2018,275.088889
2019,311.596154
2020,337.333333
2021,310.872093
2022,377.805195
2023,386.630435
2024,384.581081
2025,357.714286
