In [67]:
import re
import os
import json
import statistics
import datetime
import warnings
from src.najdi_rok import najdi_rok
import polars as pl
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
df = pl.read_parquet(os.path.join("data","cnb_ceska_proza.parquet"))

In [19]:
df = pl.concat([df, pl.read_parquet(os.path.join("data","cnb_ceska_poezie.parquet")).with_columns(pl.col('100_7').alias('autorstvo_kod')).drop("100_7")], how="diagonal")

In [21]:
df

rok,titul,autorstvo,autorstvo_kod,001
f64,str,str,str,str
1804.0,"""Mrawné propowjdky s prawdiwýma…","""Hanke z Hankenštejna Jan Alois""","""jk01033422""","""bknjje05009"""
1804.0,"""Wesnického Faráře Rozmlauwánj …","""Rulík Jan""","""jk01103049""","""cpk20041494224"""
1807.0,"""Zrcadlo wýborného sedlského ob…","""Němeček František""",,"""bknjaj17284"""
1807.0,"""Ladislaw a djtky geho""","""Nejedlý Vojtěch""","""jk01082969""","""nkc20051626328"""
1808.0,"""Cytara nowého zákona prawého B…","""Koniáš Antonín""","""jk01061522""","""cpk20162825404"""
…,…,…,…,…
2024.0,"""Nějakým způsobem""","""Pospíšil Libor""","""jo2015866694""","""nkc20243627862"""
2024.0,"""Než nám dali jména...""","""Dlabal Milan Daniel""","""ola20241236036""","""nkc20243627863"""
2024.0,"""Vzpomínky ze šuplíku""","""Koudelka Petr""","""ola20241217490""","""nkc20243641181"""
2024.0,"""Činčila čile cvičila""","""Šetková Lucie""","""xx0284340""","""nkc20243631311"""


In [69]:
autority = pd.read_parquet(os.path.join("data","aut_vyber.parquet"))

In [70]:
autority = autority.merge(pd.read_json(os.path.join('data_raw','wikidata_raw.json')), left_index=True, right_index=True, how="outer")

In [73]:
def dva_letopocty(y, z):
    try:
        return int(re.search(r'\d{4}',str(y)).group(0))
    except:
        return z

In [75]:
autority['narozeni'] = autority.apply(lambda row: dva_letopocty(row['046_f'], row['w_narozeni']), axis=1)

In [76]:
autority['umrti'] = autority.apply(lambda row: dva_letopocty(row['046_g'], row['w_umrti']), axis=1)

In [77]:
autority = pl.from_pandas(autority)

In [93]:
df = df.join(autority.select(pl.col(['100_7','umrti','046_g','w_umrti'])).explode(pl.col('100_7')), left_on='autorstvo_kod', right_on='100_7', how='left')

In [95]:
df = df.filter(pl.col('rok') <= pl.col('umrti'))

In [61]:
def irozhlas_conf():

    return {
        "config" : {
             "title": {'font': "Noticia Text",
                      'fontSize': 20,
                      'anchor': 'start',
                    'subtitleFont': 'Noticia Text',
                      'subtitleFontSize': 14,
                      'subtitlePadding': 14,
                       'dy': -10
                      },
             "axis": {
                "labelFont": "Asap",
                "titleFont": "Asap",
                "titleFontWeight": "normal",
                "labelFontSize": 11,
                "titleFontSize": 14,
                'labelPadding': 2,
                 'titlePadding': 10
            },
            "legend": {
                "labelFont": "Asap",
                "labelFontWeight": "normal",
                "titleFont": "Asap",
                "titleFontWeight": "normal",
                "labelFontSize": 11,
                "titleFontSize": 14
            }
        }
    }

alt.themes.register('irozhlas', irozhlas_conf)
alt.themes.enable('irozhlas')

ThemeRegistry.enable('irozhlas')

In [159]:
def bodovy_graf(
    temata={}, 
    titulek="",
    podtitulek = "",
    od_roku=None, 
    barvy=[], 
    kredit="vizualizace: iROZHLAS.cz · 2025"
):

    def sanitize_filename(filename):
        invalid_chars_pattern = r'[<>:"/\\|?*]'
        sanitized_filename = re.sub(invalid_chars_pattern, '_', filename)
        return sanitized_filename
    
    def hledej_tema(label, keywords):
        print(keywords)
        return df.filter(pl.col('rok') > od_roku).filter(pl.col('autorstvo_kod') == keywords).with_columns(kdo = pl.lit(label)).with_columns(
                   pl.col("rok").map_elements(
                       lambda x: datetime.date(year=int(x), month=1, day=1), 
                       return_dtype=pl.Date
                   ).cast(pl.Datetime)
               )

    po_filtru = pl.concat([hledej_tema(tema, retezec) for tema, retezec in temata.items()])
    print(po_filtru)
    
    y_encoding = {
        'field': 'kdo',
        'type': 'nominal',
        'title': None,
        'sort': [tema for tema, retezec in temata.items()]
    }
    
    gaussian_jitter = alt.Chart(po_filtru.to_pandas(), title={'text': titulek, 'subtitle': podtitulek}) \
        .mark_circle(size=15) \
        .encode(x=alt.X("rok:T", title=None), y=alt.Y(**y_encoding), yOffset=alt.YOffset("jitter:Q", scale=alt.Scale(range=[12, 15])), color=alt.Color('kdo:N', scale=alt.Scale(range=barvy), sort=[tema for tema, retezec in temata.items()]).legend(None)) \
        .transform_calculate(jitter="sqrt(-2*log(random()))*cos(2*PI*random())")

    credits = alt.Chart({"values": [{"text": kredit}]}) \
    .mark_text(
        align='right',
        baseline='bottom',
        dx=180,  # Adjust horizontal position (negative moves left)
        dy=110,  # Adjust vertical position (negative moves up)
        fontSize=11,
        font='Asap',
        color='#222'
    ).encode(
        text='text:N'
    )
    
    final_chart = (gaussian_jitter + credits).properties(
        width=350,
        height=35 * len(temata)
    )

    os.makedirs('grafy', exist_ok=True)
    for suffix in ['svg','png']:
        final_chart.save(os.path.join('grafy',f'{sanitize_filename(titulek)}.{suffix}'))
    
    return final_chart    

In [169]:
bodovy_graf(
    temata={'Karel Hynek Mácha':'jk01072915','Petr Miloslav Veselský': 'jk01142116', 'Josef Kalenský': 'jk01052646', 'Fan Vavřincová': 'jk01141829'},
    titulek="172 let české literatury ve třech kariérách",
    podtitulek="Knihy vydané během života",
    barvy=['#445B78', '#DB842F', '#70871E', '#E09DA3'],
    od_roku=1800,
)

jk01072915
jk01142116
jk01052646
jk01141829
shape: (38, 9)
┌─────────────────────┬─────────────────────────────────┬────────────────────────┬───────────────┬───┬────────┬───────────┬─────────┬────────────────────────┐
│ rok                 ┆ titul                           ┆ autorstvo              ┆ autorstvo_kod ┆ … ┆ umrti  ┆ 046_g     ┆ w_umrti ┆ kdo                    │
│ ---                 ┆ ---                             ┆ ---                    ┆ ---           ┆   ┆ ---    ┆ ---       ┆ ---     ┆ ---                    │
│ datetime[μs]        ┆ str                             ┆ str                    ┆ str           ┆   ┆ f64    ┆ list[str] ┆ f64     ┆ str                    │
╞═════════════════════╪═════════════════════════════════╪════════════════════════╪═══════════════╪═══╪════════╪═══════════╪═════════╪════════════════════════╡
│ 1836-01-01 00:00:00 ┆ Mág                             ┆ Mácha Karel Hynek      ┆ jk01072915    ┆ … ┆ 1836.0 ┆ ["1836"]  ┆ 1836.0  ┆ Karel Hynek 

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

In [165]:
df.filter(pl.col('autorstvo').str.contains('Vavřincová'))

rok,titul,autorstvo,autorstvo_kod,001,umrti,046_g,w_umrti
f64,str,str,str,str,f64,list[str],f64
1939.0,"""Patsy tropí hlouposti""","""Vavřincová Fan""","""jk01141829""","""bk193903380""",2012.0,"[""2012""]",2012.0
1941.0,"""Sladká dívka""","""Vavřincová Fan""","""jk01141829""","""bk194104206""",2012.0,"[""2012""]",2012.0
1943.0,"""Pozor! Zázračné dítě""","""Vavřincová Fan""","""jk01141829""","""bk194301860""",2012.0,"[""2012""]",2012.0
1944.0,"""Eva tropí hlouposti""","""Vavřincová Fan""","""jk01141829""","""cpk20010736350""",2012.0,"[""2012""]",2012.0
1991.0,"""Josefína""","""Vavřincová Fan""","""jk01141829""","""ck9205163""",2012.0,"[""2012""]",2012.0
…,…,…,…,…,…,…,…
1997.0,"""Marie""","""Vavřincová Fan""","""jk01141829""","""cpk19970212361""",2012.0,"[""2012""]",2012.0
1997.0,"""Lépe je být pošetilý""","""Vavřincová Fan""","""jk01141829""","""cpk19970190142""",2012.0,"[""2012""]",2012.0
2006.0,"""Taková normální rodinka""","""Vavřincová Fan""","""jk01141829""","""nkc20061645262""",2012.0,"[""2012""]",2012.0
2007.0,"""Vrah a srdcová dáma""","""Vavřincová Fan""","""jk01141829""","""nkc20071719833""",2012.0,"[""2012""]",2012.0
