In [3]:
import re
import os
import json
import statistics
import datetime
import warnings
from src.najdi_rok import najdi_rok
import polars as pl
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
df = pl.read_parquet(os.path.join("data","cnb_ceska_proza.parquet"))

In [5]:
df = pl.concat([df, pl.read_parquet(os.path.join("data","cnb_ceska_poezie.parquet")).with_columns(pl.col('100_7').alias('autorstvo_kod')).drop("100_7")], how="diagonal")

In [6]:
df

rok,titul,autorstvo,autorstvo_kod,001
f64,str,str,str,str
1804.0,"""Mrawné propowjdky s prawdiwýma…","""Hanke z Hankenštejna Jan Alois""","""jk01033422""","""bknjje05009"""
1804.0,"""Wesnického Faráře Rozmlauwánj …","""Rulík Jan""","""jk01103049""","""cpk20041494224"""
1807.0,"""Zrcadlo wýborného sedlského ob…","""Němeček František""",,"""bknjaj17284"""
1807.0,"""Ladislaw a djtky geho""","""Nejedlý Vojtěch""","""jk01082969""","""nkc20051626328"""
1808.0,"""Cytara nowého zákona prawého B…","""Koniáš Antonín""","""jk01061522""","""cpk20162825404"""
…,…,…,…,…
2024.0,"""Nějakým způsobem""","""Pospíšil Libor""","""jo2015866694""","""nkc20243627862"""
2024.0,"""Než nám dali jména...""","""Dlabal Milan Daniel""","""ola20241236036""","""nkc20243627863"""
2024.0,"""Vzpomínky ze šuplíku""","""Koudelka Petr""","""ola20241217490""","""nkc20243641181"""
2024.0,"""Činčila čile cvičila""","""Šetková Lucie""","""xx0284340""","""nkc20243631311"""


In [7]:
autority = pd.read_parquet(os.path.join("data","aut_vyber.parquet"))

In [8]:
autority = autority.merge(pd.read_json(os.path.join('data_raw','wikidata_raw.json')), left_index=True, right_index=True, how="outer")

In [9]:
def dva_letopocty(y, z):
    try:
        return int(re.search(r'\d{4}',str(y)).group(0))
    except:
        return z

In [10]:
autority['narozeni'] = autority.apply(lambda row: dva_letopocty(row['046_f'], row['w_narozeni']), axis=1)

In [11]:
autority['umrti'] = autority.apply(lambda row: dva_letopocty(row['046_g'], row['w_umrti']), axis=1)

In [12]:
autority = pl.from_pandas(autority)

In [13]:
df = df.join(autority.select(pl.col(['100_7','umrti','046_g','w_umrti'])).explode(pl.col('100_7')), left_on='autorstvo_kod', right_on='100_7', how='left')

In [14]:
df = df.filter(pl.col('rok') <= pl.col('umrti'))

In [15]:
def irozhlas_conf():

    return {
        "config" : {
             "title": {'font': "Noticia Text",
                      'fontSize': 14,
                      'anchor': 'start',
                       'fontWeight': 'bolder',
                    'subtitleFont': 'Noticia Text',
                      'subtitleFontSize': 12,
                       'subtitleFontWeight': 'lighter',
                      'subtitlePadding': 12,
                       'dy': -12,
                      },
             "axis": {
                "labelFont": "Asap",
                "titleFont": "Asap",
                 "fontWeight": "lighter",
                "titleFontWeight": "lighter",
                "labelFontSize": 10,
                "titleFontSize": 10,
                'labelPadding': 2,
                 'titlePadding': 10
            },
            "legend": {
                "labelFont": "Asap",
                "labelFontWeight": "normal",
                "titleFont": "Asap",
                "titleFontWeight": "normal",
                "labelFontSize": 10,
                "titleFontSize": 10
            }
        }
    }

alt.themes.register('irozhlas', irozhlas_conf)
alt.themes.enable('irozhlas')

ThemeRegistry.enable('irozhlas')

In [16]:
def bodovy_graf(
    temata={}, 
    titulek="",
    podtitulek = "",
    od_roku=None, 
    barvy=[], 
    kredit="vizualizace: iROZHLAS.cz · 2025"
):

    def sanitize_filename(filename):
        invalid_chars_pattern = r'[<>:"/\\|?*]'
        sanitized_filename = re.sub(invalid_chars_pattern, '_', filename)
        return sanitized_filename
    
    def hledej_tema(label, keywords):
        print(keywords)
        return df.filter(pl.col('rok') > od_roku).filter(pl.col('autorstvo_kod') == keywords).with_columns(kdo = pl.lit(label)).with_columns(
                   pl.col("rok").map_elements(
                       lambda x: datetime.date(year=int(x), month=1, day=1), 
                       return_dtype=pl.Date
                   ).cast(pl.Datetime)
               )

    po_filtru = pl.concat([hledej_tema(tema, retezec) for tema, retezec in temata.items()])
    print(po_filtru)
    
    y_encoding = {
        'field': 'kdo',
        'type': 'nominal',
        'title': None,
        'sort': [tema for tema, retezec in temata.items()]
    }
    
    gaussian_jitter = alt.Chart(po_filtru.to_pandas(), title={'text': titulek, 'subtitle': podtitulek}) \
        .mark_circle(size=15) \
        .encode(x=alt.X("rok:T", title=None), y=alt.Y(**y_encoding), yOffset=alt.YOffset("jitter:Q", scale=alt.Scale(range=[12, 15])), color=alt.Color('kdo:N', scale=alt.Scale(range=barvy), sort=[tema for tema, retezec in temata.items()]).legend(None)) \
        .transform_calculate(jitter="sqrt(-2*log(random()))*cos(2*PI*random())")

    credits = alt.Chart({"values": [{"text": kredit}]}) \
    .mark_text(
        align='right',
        baseline='bottom',
        dx=180,  # Adjust horizontal position (negative moves left)
        dy=110,  # Adjust vertical position (negative moves up)
        fontSize=11,
        font='Asap',
        color='#222'
    ).encode(
        text='text:N'
    )
    
    final_chart = (gaussian_jitter + credits).properties(
        width=350,
        height=35 * len(temata)
    )

    os.makedirs('grafy', exist_ok=True)
    for suffix in ['svg','png']:
        final_chart.save(os.path.join('grafy',f'{sanitize_filename(titulek)}.{suffix}'))
    
    return final_chart    

In [42]:
def hledej_autorstvo(label, keywords):
        print(keywords)
        return df.filter(pl.col('rok') > 1800).filter(pl.col('autorstvo_kod') == keywords).with_columns(kdo = pl.lit(label)).with_columns(
                   pl.col("rok").map_elements(
                       lambda x: datetime.date(year=int(x), month=1, day=1), 
                       return_dtype=pl.Date
                   ).cast(pl.Datetime)
               )

In [52]:
koho_ukazujeme = {'Karel Hynek Mácha':'jk01072915','Petr Miloslav Veselský': 'jk01142116', 'Josef Kalenský': 'jk01052646', 'Fan Vavřincová': 'jk01141829'}

In [54]:
do_grafu = pl.concat([hledej_autorstvo(jmeno, kod) for jmeno, kod in koho_ukazujeme.items()])
do_grafu

jk01072915
jk01142116
jk01052646
jk01141829


rok,titul,autorstvo,autorstvo_kod,001,umrti,046_g,w_umrti,kdo
datetime[μs],str,str,str,str,f64,list[str],f64,str
1836-01-01 00:00:00,"""Mág""","""Mácha Karel Hynek""","""jk01072915""","""cpk20041417516""",1836.0,"[""1836""]",1836.0,"""Karel Hynek Mácha"""
1834-01-01 00:00:00,"""Ukradený střewjc, anebo, Prawý…","""Veselský Petr Miloslav""","""jk01142116""","""nkc20102130003""",1889.0,"[""1889""]",1889.0,"""Petr Miloslav Veselský"""
1834-01-01 00:00:00,"""Jozefka, anebo, Podiwné schled…","""Veselský Petr Miloslav""","""jk01142116""","""cpk20021123969""",1889.0,"[""1889""]",1889.0,"""Petr Miloslav Veselský"""
1837-01-01 00:00:00,"""Jolanta, anebo, Obnowená důwěr…","""Veselský Petr Miloslav""","""jk01142116""","""nkc20092009453""",1889.0,"[""1889""]",1889.0,"""Petr Miloslav Veselský"""
1838-01-01 00:00:00,"""Mocnost lásky, neb, Wiljm a Ma…","""Veselský Petr Miloslav""","""jk01142116""","""cpk20021195155""",1889.0,"[""1889""]",1889.0,"""Petr Miloslav Veselský"""
…,…,…,…,…,…,…,…,…
1997-01-01 00:00:00,"""Marie""","""Vavřincová Fan""","""jk01141829""","""cpk19970212361""",2012.0,"[""2012""]",2012.0,"""Fan Vavřincová"""
1997-01-01 00:00:00,"""Lépe je být pošetilý""","""Vavřincová Fan""","""jk01141829""","""cpk19970190142""",2012.0,"[""2012""]",2012.0,"""Fan Vavřincová"""
2006-01-01 00:00:00,"""Taková normální rodinka""","""Vavřincová Fan""","""jk01141829""","""nkc20061645262""",2012.0,"[""2012""]",2012.0,"""Fan Vavřincová"""
2007-01-01 00:00:00,"""Vrah a srdcová dáma""","""Vavřincová Fan""","""jk01141829""","""nkc20071719833""",2012.0,"[""2012""]",2012.0,"""Fan Vavřincová"""


In [56]:
titulek="172 let české literatury ve třech kariérách"
podtitulek="Knihy vydané během života"
y_encoding = {
        'field': 'kdo',
        'type': 'nominal',
        'title': None,
        'sort': [jmeno for jmeno,kod in koho_ukazujeme.items()]
    }

In [82]:
tri_kariery = alt.Chart(do_grafu.to_pandas(), title={'text': titulek, 'subtitle': podtitulek}, width=300, height=150).mark_circle(size=15) \
        .encode(
            x=alt.X("rok:T", title=None, axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6')), 
            y=alt.Y(**y_encoding, axis=alt.Axis(orient='left', domainOpacity=0, tickColor='#DCDDD6')), 
            yOffset=alt.YOffset("jitter:Q", scale=alt.Scale(range=[12, 15])), 
            color=alt.Color('kdo:N', scale=alt.Scale(range=['#D6534B', '#445B78', '#DB842F', '#70871E']), 
                            sort=[jmeno for jmeno, kod in koho_ukazujeme.items()]).legend(None)) \
        .transform_calculate(jitter="sqrt(-2*log(random()))*cos(2*PI*random())")

tri_kariery

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
