In [1]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [3]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok") >= 1800)

df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null()).unique(subset=["008","100_a","245_a","245_p"], keep="first")
print(len(df))

716789
705921


In [4]:
df = df.filter(pl.col("stran") >= 30)

In [5]:
aut = pl.read_parquet(os.path.join("data","aut_vyber.parquet"))
cesi = aut.explode("370_c").filter(pl.col("370_c").str.contains("Česk")).explode("100_7").select(pl.col("100_7")).to_series().to_list()
len(cesi)

364420

In [6]:
vek = pl.read_parquet(os.path.join("data","narozeni-umrti-gender.parquet"))
df = df.join(vek, on="100_7", how="left").with_columns((pl.col("rok") - pl.col("narozeni")).alias("vek"))

In [9]:
df_100 = pl.read_parquet(os.path.join("data","podil_autorstva_s_pauzou.parquet")).with_columns(pl.col("rok").map_elements(lambda x: x.year)).filter(pl.col("rok").is_between(1910,1990)).with_columns(pl.col("podil_cechu_15").map_elements(lambda x: round(x, 3)))
df_koncici = pl.read_parquet(os.path.join('data','podily_konciciho_autorstva_cs.parquet'))

In [11]:
graf_pauzy = alt.Chart(
    alt_friendly(df_100), width=300,
    title=alt.Title("…a jak velká se odmlčela na 10 a více let")
).mark_area(color='#E09DA3'
).encode(alt.X(
    "rok:T", title=None, axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')
), alt.Y(
    "podil_cechu_10:Q", title=None, 
    axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6', labelExpr="datum.label * 100 + ' %'", orient='right', tickCount=3),
    scale=alt.Scale(domain=[0,0.25])
))
                    
rule1 = alt.Chart(alt.Data(values=[{'rok': '1948-02-25'}])).mark_rule(
    color='#81A9D5',
    strokeDash=[4, 4]  # Optional: makes the line dashed
).encode(
    x='rok:T'
)

rule2 = alt.Chart(alt.Data(values=[{'rok': '1968-08-21'}])).mark_rule(
    color='#81A9D5',
    strokeDash=[4, 4]  # Optional: makes the line dashed
).encode(
    x='rok:T'
)

# Add text annotation
text1 = alt.Chart(alt.Data(values=[{'rok': '1948-02-25', 'y': 0.2}])).mark_text(
    # angle=270,  # Rotates text vertically
    align='right',
    baseline='middle',
    dy=15,# Slight horizontal offset from the line
    dx=-8,
    text=['komunistický','převrat'],
    font='Asap'
).encode(
    x='rok:T',
    y=alt.value(0)  # Places text at bottom of chart
)


# Add text annotation
text2 = alt.Chart(alt.Data(values=[{'rok': '1968-08-21', 'y': 0.2}])).mark_text(
    # angle=270,  # Rotates text vertically
    align='left',
    baseline='middle',
    dy=15,# Slight horizontal offset from the line
    dx=8,
    text=['srpnová','okupace'],
    font='Asap'
).encode(
    x='rok:T',
    y=alt.value(0)  # Places text at bottom of chart
)

graf_pauzy_graf_komb = graf_pauzy + rule1 + text1 + rule2 + text2
graf_pauzy_graf_komb

In [17]:
type(graf_pauzy_graf_komb)

altair.vegalite.v5.api.LayerChart

In [35]:
credits = pl.DataFrame({'text': ['zdroj dat: Česká národní bibliografie · vizualizace: iROZHLAS.cz · 2025']})
credits = alt.Chart(credits.to_pandas(), width=300, height=30).encode(text=alt.Text('text:N')).mark_text(fontSize=10, font="Asap", baseline='line-bottom')
credits

In [127]:
def me_to_neurazi(graf: alt.vegalite.v5.api.LayerChart, kredity: str, soubor:str, slozka='grafy'): 

    from lxml import etree
    import polars as pl
    import os
    
    def concatenate_svg_vertically(image1_path, image2_path, output_path):
        with open(image1_path, 'r', encoding='utf-8') as f:
            svg1 = etree.parse(f)
        with open(image2_path, 'r', encoding='utf-8') as f:
            svg2 = etree.parse(f)
        root1 = svg1.getroot()
        root2 = svg2.getroot()
        width1 = int(root1.get("width", "0").replace("px", ""))
        height1 = int(root1.get("height", "0").replace("px", ""))
        width2 = int(root2.get("width", "0").replace("px", ""))
        height2 = int(root2.get("height", "0").replace("px", ""))
        new_width = max(width1, width2)
        new_height = height1 + height2
        new_svg = etree.Element("svg", xmlns="http://www.w3.org/2000/svg", width=f"{new_width}px", height=f"{new_height}px")
        background = etree.Element("rect", width=str(new_width), height=str(new_height), fill="white")
        new_svg.append(background)
        group1 = etree.Element("g", transform="translate(0,0)")
        for child in root1:
            group1.append(child)
        x_offset = new_width - width2
        group2 = etree.Element("g", transform=f"translate({x_offset},{height1})")
        for child in root2:
            group2.append(child)
        new_svg.append(group1)
        new_svg.append(group2)
        with open(output_path, 'wb') as f:
            f.write(etree.tostring(new_svg, pretty_print=True, encoding='utf-8', xml_declaration=True))

    os.makedirs(slozka, exist_ok=True)
    graf.save("grafy/temp1.svg")
    spodni = pl.DataFrame({'text': [kredity]})
    spodni = alt.Chart(spodni.to_pandas(), width=300, height=30).encode(
        x=alt.value(300),
        text=alt.Text('text:N')
    ).mark_text(
        fontSize=10, 
        font="Asap", 
        baseline='line-bottom', 
        align="right",
        dx=-5
    ).configure_view(stroke='transparent')
    spodni.save("grafy/temp2.svg")
    
    concatenate_svg_vertically(f'{slozka}/temp1.svg', f'{slozka}/temp2.svg', f'{slozka}/{soubor}.svg')
    
    print("ahoj")

    return spodni

In [125]:
me_to_neurazi(graf_pauzy_graf_komb, ["data: Národní knihovna, Wikidata ● vizualizace: iROZHLAS.cz ● 2025"], "pokusny_pokus")

ahoj
