In [1]:
import os
import datetime
import warnings
import polars as pl
import pandas as pd
import altair as alt

from src.najdi_rok import najdi_rok
from src.pocet_stran import pocet_stran
from src.bez_bordelu import bez_bordelu
from src.alt_friendly import alt_friendly
from src.hezke_jmeno import hezke_jmeno
from src.kristi_promin import kristi_promin

pl.Config(tbl_rows=100)
alt.data_transformers.disable_max_rows()
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')
warnings.filterwarnings('ignore')

In [48]:
df = pl.read_parquet(os.path.join("data/cnb_sloupce","100.parquet"))
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","leader.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","008.parquet")), left_on="001", right_on="001", how="left")
df = df.to_pandas()
df = df[df["leader"].str[6].isin(["a", "t"])]
df = df[~df["leader"].str[7].isin(["b", "i", "s", " "])]
df = df[(df["008"].str[15:17] == "xr") & (df["008"].str[35:38] == "cze")]
df = pl.from_pandas(df)
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","020.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","022.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","245.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","300.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","655.parquet")), left_on="001", right_on="001", how="left")
df = df.join(pl.read_parquet(os.path.join("data/cnb_sloupce","700.parquet")), left_on="001", right_on="001", how="left")
df = df.explode("022_a").filter(pl.col("022_a").is_null())
df = df.with_columns(pl.col('008').map_elements(najdi_rok, return_dtype=int).alias('rok'))
df = df.with_columns(pl.col('300_a').map_elements(pocet_stran, return_dtype=int).alias('stran'))
df = df.with_columns(pl.col('245_a').map_elements(bez_bordelu, return_dtype=str))
df = df.explode('245_p').with_columns(pl.col('245_p').map_elements(bez_bordelu, return_dtype=str))
print(len(df))
df = df.filter(pl.col("rok").is_between(1800,2024))
df = df.filter(pl.col("stran") > 30)

df = df.drop_nulls(subset=['100_7'])
df = df.filter((~pl.col("245_h").str.contains("grafika")) | pl.col("245_h").is_null())
print(len(df))

716789
582075


In [110]:
def obdobi(rok1, rok2, pasmo):
    pred = df.filter(pl.col("rok").is_between(rok1-pasmo,rok1)).select(pl.col("100_7")).to_series().to_list()
    behem = set(df.filter(pl.col("rok").is_between(rok1+1,rok2-1)).select(pl.col("100_7")).to_series().to_list())
    po = set(df.filter(pl.col("rok").is_between(rok2,rok2+pasmo)).select(pl.col("100_7")).to_series().to_list())
    nevychazeli = [p for p in pred if p in po and p not in behem]
    print(len(nevychazeli))
    return df.filter(
        pl.col('100_7').is_in(nevychazeli)
    ).filter(pl.col("rok").is_between(rok2,rok2+pasmo) | pl.col("rok").is_between(rok1-pasmo,rok1)).group_by(
        ['100_a','100_7']
    ).len().sort(by='len',descending=True).head(100).join(
        df.filter(pl.col("rok").is_between(rok1-pasmo,rok1)).group_by(['100_a','100_7']).len().rename({"len":"len_before"}).select(
            pl.col(['100_7','len_before'])
        ), on='100_7', how='left'
    ).with_columns((pl.col('len_before') / pl.col('len')).alias('podil'))

In [124]:
obdobi(1939,1945,10)

5155


100_a,100_7,len,len_before,podil
str,str,u32,u32,f64
"""Lenin, Vladimir Il‘jič,""","""jn19981001737""",133,13,0.097744
"""Gor‘kij, Maksim,""","""jn19990002786""",103,13,0.126214
"""Stalin, Iosif Vissarionovič,""","""jn19990210582""",97,8,0.082474
"""Rolland, Romain,""","""jn19990007118""",93,40,0.430108
"""Masaryk, Tomáš Garrigue,""","""jk01080472""",86,65,0.755814
"""Gottwald, Klement,""","""jk01032566""",86,3,0.034884
"""Neumann, Stanislav Kostka,""","""jk01090079""",78,29,0.371795
"""Sokol Tůma, František,""","""jk01120356""",70,62,0.885714
"""Marx, Karl,""","""jn19990005454""",65,15,0.230769
"""Medek, Rudolf,""","""jk01081030""",64,63,0.984375


In [126]:
obdobi(1939,1945,10).select(pl.col("100_a")).to_series().to_list()

5155


['Lenin, Vladimir Il‘jič,',
 'Gor‘kij, Maksim,',
 'Stalin, Iosif Vissarionovič,',
 'Rolland, Romain,',
 'Masaryk, Tomáš Garrigue,',
 'Gottwald, Klement,',
 'Neumann, Stanislav Kostka,',
 'Sokol Tůma, František,',
 'Marx, Karl,',
 'Medek, Rudolf,',
 'Galsworthy, John,',
 'Zápotocký, Antonín,',
 'Langer, František,',
 'Weinfurter, Karel,',
 'Shaw, Bernard,',
 'Sinclair, Upton,',
 'Maurois, André,',
 'Engels, Friedrich,',
 'Èrenburg, Il‘ja Grigor‘jevič,',
 'Tolstoj, Aleksej Nikolajevič,',
 'Svačina, Bohumil,',
 'Maupassant, Guy de,',
 'Borovanský, Ladislav,',
 'Petrus, Jan,',
 'Hašek, Jaroslav,',
 'Dorazil, Otakar,',
 'Čechov, Anton Pavlovič,',
 'Král, Saša,',
 'Ludwig, Emil,',
 'Kipling, Rudyard,',
 'Undset, Sigrid,',
 'Sova, Antonín,',
 'Hruban, Konrád,',
 'Šolochov, Michail Aleksandrovič,',
 'Kolda, Jan,',
 'France, Anatole,',
 'Vykoukal, František Vladimír,',
 'Dostojevskij, Fedor Michajlovič,',
 'Gajdar, Arkadij Petrovič,',
 'De la Roche, Mazo,',
 'Kisch, Egon Erwin,',
 'Lewis, Sincl

In [122]:
df.filter(pl.col('100_a').str.contains('Orwell, G')).select(pl.col(['245_a','rok'])).sort(by='rok')

245_a,rok
str,i64
"""Trosečníkem v Paříži a Londýně""",1935
"""Farma zvířat""",1946
"""Hold Katalánsku""",1991
"""Farma zvířat""",1991
"""1984""",1991
"""Úpadek anglické vraždy a jiné …",1995
"""V břiše velryby""",1996
"""Na dně v Paříži a Londýně""",1996
"""Uvnitř velryby a jiné eseje""",1997
"""Barmské dny""",1998


In [114]:
obdobi(1948,1989,10)

1480


100_a,100_7,len,len_before,podil
str,str,u32,u32,f64
"""Courths-Mahler, Hedwig,""","""jn19990001513""",190,2,0.010526
"""Javořická, Vlasta,""","""jk01051344""",181,28,0.154696
"""Marešová, Eva,""","""jo2008475985""",164,156,0.95122
"""Strong, Pitt,""","""jo2005267810""",96,94,0.979167
"""Brand, Max,""","""jn19981000325""",61,12,0.196721
"""Clifton, Léon""","""xx0111518""",56,53,0.946429
"""Hüttlová, Jaromíra,""","""xx0282734""",40,30,0.75
"""Háj, Felix,""","""jk01033019""",39,12,0.307692
"""Pokorný, Jaroslav,""","""jk01100056""",38,15,0.394737
"""Steiner, Rudolf,""","""jn19990009728""",36,4,0.111111


In [130]:
obdobi(1948,1989,10).filter(pl.col("podil").is_between(0.25,0.75))

1480


100_a,100_7,len,len_before,podil
str,str,u32,u32,f64
"""Hüttlová, Jaromíra,""","""xx0282734""",40,30,0.75
"""Háj, Felix,""","""jk01033019""",39,12,0.307692
"""Pokorný, Jaroslav,""","""jk01100056""",38,15,0.394737
"""Gregory, Jackson,""","""jn20000602422""",23,6,0.26087
"""Tippmannová, Marie,""","""jk01132496""",20,14,0.7
"""Churchill, Winston,""","""jn19990003864""",20,7,0.35
"""Engliš, Karel,""","""jk01030495""",20,15,0.75
"""Fringilla,""","""jk01031977""",15,10,0.666667
"""Součková, Milada,""","""jz8001161""",15,7,0.466667
"""Weinfurter, Karel,""","""jk01151502""",15,5,0.333333


In [74]:
obdobi(1948,1989,5)

420


100_a,100_7,len
str,str,u32
"""Javořická, Vlasta,""","""jk01051344""",75
"""Courths-Mahler, Hedwig,""","""jn19990001513""",43
"""Háj, Felix,""","""jk01033019""",33
"""Pokorný, Jaroslav,""","""jk01100056""",23
"""Hüttlová, Jaromíra,""","""xx0282734""",15
"""Minařík, Květoslav,""","""jn19990209552""",15
"""Brunton, Paul,""","""jn19990001164""",14
"""Mitchell, Margaret,""","""jn20000604050""",13
"""Fringilla,""","""jk01031977""",12
"""Tomáš, Eduard,""","""jn99240001194""",12


In [76]:
obdobi(1968,1989,3)

314


100_a,100_7,len
str,str,u32
"""Sviták, Ivan,""","""jk01121907""",23
"""Brod, Toman,""","""jk01013138""",10
"""Mňačko, Ladislav,""","""jn19990210438""",9
"""Javůrková, Jena""","""xx0020417""",9
"""Kaplan, Karel,""","""jk01052903""",9
"""Fischl, Viktor,""","""jo19990047113""",9
"""Šik, Ota,""","""jk01122884""",8
"""Nietzsche, Friedrich,""","""jn19990006124""",8
"""Goulli, Rochdi,""","""xx0004722""",8
"""Sartre, Jean-Paul,""","""jn19990007359""",8


In [58]:
obdobi(1948,1989,10)

1480


100_a,100_7,len
str,str,u32
"""Courths-Mahler, Hedwig,""","""jn19990001513""",648
"""Javořická, Vlasta,""","""jk01051344""",461
"""Strong, Pitt,""","""jo2005267810""",323
"""Clifton, Léon""","""xx0111518""",317
"""Steiner, Rudolf,""","""jn19990009728""",220
"""Vymazal, František,""","""jk01151194""",185
"""Marešová, Eva,""","""jo2008475985""",168
"""Medek, Rudolf,""","""jk01081030""",125
"""Pokorný, Jaroslav,""","""jk01100056""",123
"""Brand, Max,""","""jn19981000325""",97


In [60]:
obdobi(1918,1989,5)

7


100_a,100_7,len
str,str,u32
"""Vlasák, Antonín Norbert,""","""jk01150040""",17
"""Zachar, Otakar,""","""jk01151985""",9
"""Mill, John Stuart,""","""jn20000604033""",9
"""Zukal, Josef,""","""jk01152841""",4
"""Rosny, J.-H.,""","""pna2017951980""",3


In [64]:
obdobi(1948,1989,2)

145


100_a,100_7,len
str,str,u32
"""Steiner, Rudolf,""","""jn19990009728""",220
"""Weinfurter, Karel,""","""jk01151502""",91
"""Hüttlová, Jaromíra,""","""xx0282734""",87
"""Tůma, Karel,""","""jk01140380""",66
"""Háj, Felix,""","""jk01033019""",66
"""Engliš, Karel,""","""jk01030495""",64
"""Beneš Buchlovan, Bedřich,""","""jk01011648""",52
"""Orwell, George,""","""jn19981001921""",51
"""Tomáš, Eduard,""","""jn99240001194""",49
"""Liguori, Alfonso Maria de,""","""jn20000701056""",46
