# Kapitel 6.8: Geschichtstheorie

Das Notebook ergänzt Kapitel 6.8 'Geschichtstheorie'.

# Import

In [1]:
import pandas as pd
import numpy as np

from resources_statistics import *
from resources_geschichtslyrik import *

import plotly.express as px
import plotly.graph_objects as go
from plotly.validators.scatter.marker import SymbolValidator

from tqdm.notebook import tqdm

In [2]:
meta = pd.read_json(r"../resources/meta.json")

# Korpora

### Korpora erstellen

In [3]:
meta_anth = (
    meta
    .query("corpus=='anth'")
    .query("1850 <= year <= 1918")
    .query("geschichtslyrik == 1")
    .drop_duplicates(subset='author_title')
)
meta_anth_bin = binarize_meta(meta_anth)

In [4]:
modcanon_authors = ['Hofmannsthal, Hugo von', 'Rilke, Rainer Maria', 'George, Stefan', 'Heym, Georg']

meta_modcanon = (
    meta
    .query("author in @modcanon_authors")
    .query("1850 <= year <= 1918")
    .query("geschichtslyrik == 1")
    .drop_duplicates(subset='author_title')
)

In [5]:
muench_authors = ['Münchhausen, Börries von', 'Miegel, Agnes', 'Strauß und Torney, Lulu von']

meta_muench = (
    meta
    .query("author in @muench_authors")
    .query("1850 <= year <= 1918")
    .query("geschichtslyrik == 1")
    .drop_duplicates(subset='author_title')
)

In [6]:
sub_df = pd.DataFrame()
sub_names = ['Anthologien', 'Kanonisierte Moderne', 'Münchhausen-Kreis']
sub_metas = [meta_anth, meta_modcanon, meta_muench]

### Merkmale berechnen

In [7]:
for this_name, this_meta in zip(sub_names, sub_metas):
    sub_df.loc[this_name, 'Jahr'] = round(this_meta['year'].mean(), 0)
    sub_df.loc[this_name, 'Texte'] = this_meta.shape[0]

    theoretisch_count = this_meta.query("theoretisch == 1").shape[0]
    sub_df.loc[this_name, 'theoretisch_count'] = theoretisch_count
    sub_df.loc[this_name, 'theoretisch_share'] = theoretisch_count/this_meta.shape[0]
    
    ueberlieferung_count = this_meta.query("ueberlieferung == 1").shape[0]
    sub_df.loc[this_name, 'ueberlieferung_count'] = ueberlieferung_count
    sub_df.loc[this_name, 'ueberlieferung_share'] = ueberlieferung_count/this_meta.shape[0]
    sub_df.loc[this_name, 'ueberlieferung_neutral'] = this_meta.query("ueberlieferung_bewertung == 'neutral'").shape[0]/ueberlieferung_count
    sub_df.loc[this_name, 'ueberlieferung_positiv'] = this_meta.query("ueberlieferung_bewertung == 'positiv'").shape[0]/ueberlieferung_count
    sub_df.loc[this_name, 'ueberlieferung_negativ'] = this_meta.query("ueberlieferung_bewertung == 'negativ'").shape[0]/ueberlieferung_count
    sub_df.loc[this_name, 'ueberlieferung_ambivalent'] = this_meta.query("ueberlieferung_bewertung == 'ambivalent'").shape[0]/ueberlieferung_count
    
    geschichtsauffassung_count = this_meta.query("geschichtsauffassung == 1").shape[0]
    sub_df.loc[this_name, 'geschichtsauffassung_count'] = geschichtsauffassung_count
    sub_df.loc[this_name, 'geschichtsauffassung_share'] = geschichtsauffassung_count/this_meta.shape[0]
    sub_df.loc[this_name, 'geschichtsauffassung_neutral'] = this_meta.query("geschichtsauffassung_bewertung == 'neutral'").shape[0]/geschichtsauffassung_count
    sub_df.loc[this_name, 'geschichtsauffassung_positiv'] = this_meta.query("geschichtsauffassung_bewertung == 'positiv'").shape[0]/geschichtsauffassung_count
    sub_df.loc[this_name, 'geschichtsauffassung_negativ'] = this_meta.query("geschichtsauffassung_bewertung == 'negativ'").shape[0]/geschichtsauffassung_count
    sub_df.loc[this_name, 'geschichtsauffassung_ambivalent'] = this_meta.query("geschichtsauffassung_bewertung == 'ambivalent'").shape[0]/geschichtsauffassung_count
       

In [8]:
round(sub_df, 2)

Unnamed: 0,Jahr,Texte,theoretisch_count,theoretisch_share,ueberlieferung_count,ueberlieferung_share,ueberlieferung_neutral,ueberlieferung_positiv,ueberlieferung_negativ,ueberlieferung_ambivalent,geschichtsauffassung_count,geschichtsauffassung_share,geschichtsauffassung_neutral,geschichtsauffassung_positiv,geschichtsauffassung_negativ,geschichtsauffassung_ambivalent
Anthologien,1875.0,1850.0,56.0,0.03,433.0,0.23,0.33,0.56,0.06,0.05,76.0,0.04,0.41,0.34,0.22,0.04
Kanonisierte Moderne,1903.0,113.0,6.0,0.05,25.0,0.22,0.48,0.44,0.08,0.0,1.0,0.01,0.0,0.0,1.0,0.0
Münchhausen-Kreis,1905.0,140.0,7.0,0.05,31.0,0.22,0.65,0.29,0.0,0.06,4.0,0.03,0.25,0.25,0.25,0.25


# Zeitverlauf

In [9]:
ts = pd.DataFrame()
ts.index = pd.Series(range(1850, 1919), name = 'year')

In [10]:
ts['text_count'] = meta_anth.groupby('year').size()
ts['text_count'] = ts['text_count'].fillna(0)
ts['text_sum'] = smooth(ts['text_count'], mode = 'sum')

In [11]:
ts['theoretisch_count'] = [meta_anth.query("year == @x and theoretisch == 1").shape[0] for x in ts.index]
ts['theoretisch_sum'] = smooth(ts['theoretisch_count'], mode = 'sum')
ts['theoretisch_share_smoothed'] = ts['theoretisch_sum']/ts['text_sum']

ts['ueberlieferung_count'] = [meta_anth.query("year == @x and ueberlieferung == 1").shape[0] for x in ts.index]
ts['ueberlieferung_sum'] = smooth(ts['ueberlieferung_count'], mode = 'sum')
ts['ueberlieferung_share_smoothed'] = ts['ueberlieferung_sum']/ts['text_sum']

ts['geschichtsauffassung_count'] = [meta_anth.query("year == @x and geschichtsauffassung == 1").shape[0] for x in ts.index]
ts['geschichtsauffassung_sum'] = smooth(ts['geschichtsauffassung_count'], mode = 'sum')
ts['geschichtsauffassung_share_smoothed'] = ts['geschichtsauffassung_sum']/ts['text_sum']

# Überblick

In [29]:
meta_plot = ts[[
    'theoretisch_share_smoothed', 
    'ueberlieferung_share_smoothed', 
    'geschichtsauffassung_share_smoothed',
]]
meta_plot.columns = [
    'Geschichtstheorie', 
    'Überlieferung', 
    'Geschichtsauffassung',
]
save_ts_data(meta_plot, prefix='06_08_Geschichtstheorie_')

fig = create_ts_plot(
    data = meta_plot, 
    columns = meta_plot.columns, 
    y_axis_title = 'Anteil an Texten',
    add_corporas = sub_df, add_corpora_names = sub_names,
    add_corpora_categories = ['theoretisch_share', 'ueberlieferung_share', 'geschichtsauffassung_share']
)
fig = update_fig_for_publication(fig)
fig.write_image(f"plots/6.8 Geschichtstheorie, Überlieferung und Geschichtsauffassungen im Zeitverlauf.pdf")
fig.show()

In [13]:
meta_anth.query("theoretisch==1").shape[0]

56

In [14]:
meta_anth.query("1894 <= year <= 1896")[[
    'author', 'title', 'stoffgebiet', 'ueberlieferung'
]].sort_values(by='ueberlieferung', ascending=False)

Unnamed: 0,author,title,stoffgebiet,ueberlieferung
15210,"Warncke, Paul",Heil Bismarck!,Nation/Volk-D,1.0
15230,"Groth, Klaus",Die Eröffnung des Kaiser Wilhelm-Kanals,Architektur,1.0
15357,"Wolff, Julius",Das deutsche Heer,Militär/Krieg,1.0
15864,"Seydel, Max von",Sphakteria,Auferstehung/Geister + Politik + Militär/Krieg,1.0
17362,"Saar, Ferdinand von",Mozart,Musik,1.0
17001,"Wickenburg, Albrecht von",Marschall Daun,Militär/Krieg,1.0
16885,"Buschhorn, Carl",Am Hermannsdenkmal,Denkmal + Militär/Krieg,1.0
19587,"Fontane, Theodor",Luren-Konzert,Musik,1.0
16211,"Löwenberg, Jakob",Auf dem Felde der Ehre,Gesundheit + Militär/Krieg,1.0
15403,"Langewiesche, Wilhelm",Kaiser Heinrichs Weihnacht,Religion + Politik,0.0


# Überlieferung

In [15]:
main_feature = 'ueberlieferung'

In [16]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = False).head(20)

ueberlieferung                          1.000000
gegenwartsbezug                         0.460262
sprechinstanz_nicht_in_vergangenheit    0.350572
denkmal                                 0.340426
gegenwartsdominant                      0.281573
zeitmarker_vorhanden                    0.278408
zeitebenen                              0.240563
wissen_identisch                        0.238014
sprechinstanz_markiert                  0.219166
nichtmensch_count                       0.179294
geschichtsauffassung_positiv            0.178025
zustand                                 0.176819
liebe_positiv                           0.166875
theoretisch                             0.155682
sprechakt_behaupten_vorhanden           0.154874
ende                                    0.135804
zeit_mitte                              0.129020
neuzeit                                 0.127360
heroismus                               0.126413
nationalismus                           0.125529
Name: ueberlieferung

In [17]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = True).head(20)

wissen_ergaenzend                -0.225398
ballade                          -0.188982
ereignis                         -0.163151
in_hohem_mass_konkret            -0.162480
sprechakt_erzaehlen_vorhanden    -0.155225
liebe_negativ                    -0.153574
konkretheit                      -0.149453
sprechinstanz_in_vergangenheit   -0.131094
mittelalter                      -0.100896
unbekanntes_individuum_negativ   -0.096303
rollengedicht                    -0.093476
religion                         -0.092610
tod_negativ                      -0.092319
fixierbarkeit                    -0.086787
reim                             -0.078565
entity_negativ                   -0.075880
bekanntes_individuum_negativ     -0.060223
stoffgebiet_negativ              -0.057552
antike                           -0.055433
objektmarker_vorhanden           -0.054322
Name: ueberlieferung, dtype: float64

In [18]:
threshold = 0.2

bin_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'bin')
cont_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'cont')

In [19]:
results = relations_binbin(
    meta = meta_anth_bin, 
    main_feature = main_feature,
    comp_features = bin_comp_features
)

In [20]:
directly_related = [
    'sprechinstanz_nicht_in_vergangenheit', # related to gegenwartsbezug+sprechinstanz_markiert
    'gegenwartsdominant', # related to gegenwartsbezug
    'wissen_ergaenzend', # related to wissen_identisch
]
results_filtered = (
    results
    .query("index not in @directly_related")
    .query("chi2_p < 0.05 and min_expected >= 5 and phi >= @threshold")
    .sort_values(by = 'diff', ascending = False)
)
round(results_filtered, 2)

Unnamed: 0,wenn_nicht,wenn_nicht_detail,wenn_ja,wenn_ja_detail,diff_low_bootstrap,diff_low,diff,diff_high,diff_high_bootstrap,chi2,chi2_p,fisher_p,phi,min_real,min_expected
gegenwartsbezug,0.17,247/1417,0.67,289/433,0.44,0.44,0.49,0.54,0.54,391.91,0.0,0.0,0.46,144.0,125.45
zeitmarker_vorhanden,0.35,489/1417,0.67,290/433,0.27,0.27,0.32,0.38,0.38,143.4,0.0,0.0,0.28,143.0,182.33
sprechinstanz_markiert,0.38,536/1417,0.64,275/433,0.2,0.2,0.26,0.31,0.31,88.86,0.0,0.0,0.22,158.0,189.82
wissen_identisch,0.1,137/1417,0.29,127/433,0.15,0.15,0.2,0.24,0.24,104.8,0.0,0.0,0.24,127.0,61.79
denkmal,0.0,4/1417,0.16,69/433,0.12,0.12,0.16,0.19,0.19,214.4,0.0,0.0,0.34,4.0,17.09


In [21]:
results_other = results.query("index not in @results_filtered.index")
round(results_other.sort_values(by='diff', ascending=False), 2)

Unnamed: 0,wenn_nicht,wenn_nicht_detail,wenn_ja,wenn_ja_detail,diff_low_bootstrap,diff_low,diff,diff_high,diff_high_bootstrap,chi2,chi2_p,fisher_p,phi,min_real,min_expected
sprechinstanz_nicht_in_vergangenheit,0.19,270/1417,0.56,243/433,0.32,0.32,0.37,0.42,0.42,227.37,0.0,0.0,0.35,190.0,120.07
gegenwartsdominant,0.09,127/1417,0.32,140/433,0.19,0.19,0.23,0.28,0.28,146.67,0.0,0.0,0.28,127.0,62.49
stoffgebiet_positiv,0.44,857/1938,0.54,327/602,0.06,0.06,0.1,0.15,0.15,18.82,0.0,0.0,0.09,275.0,280.62
bekanntes_individuum_positiv,0.55,869/1587,0.65,283/438,0.05,0.05,0.1,0.15,0.15,13.59,0.0,0.0,0.08,155.0,188.83
entity_positiv,0.46,1391/3011,0.53,480/905,0.03,0.03,0.07,0.11,0.1,13.05,0.0,0.0,0.06,425.0,432.39
kollektiv_positiv,0.37,302/827,0.4,94/237,-0.04,-0.04,0.03,0.1,0.1,0.78,0.38,0.4,0.03,94.0,88.21
unbekanntes_individuum_positiv,0.34,167/495,0.37,50/136,-0.06,-0.06,0.03,0.12,0.12,0.43,0.51,0.54,0.03,50.0,46.77
entity_ambivalent,0.06,189/3011,0.06,55/905,-0.02,-0.02,-0.0,0.02,0.02,0.05,0.83,0.88,0.0,55.0,56.39
stoffgebiet_ambivalent,0.14,263/1938,0.12,75/602,-0.04,-0.04,-0.01,0.02,0.02,0.49,0.48,0.54,0.01,75.0,80.11
kollektiv_negativ,0.26,211/827,0.24,57/237,-0.07,-0.08,-0.01,0.05,0.05,0.21,0.65,0.67,0.01,57.0,59.7


In [22]:
result_categories = ['wenn_nicht', 'wenn_nicht_detail', 'wenn_ja', 'wenn_ja_detail', 'diff', 'chi2_p', 'phi',]

results_a = relations_binbin(
    meta = meta_anth_bin.query("1850 <= year <= 1884"), 
    main_feature = main_feature,
    comp_features = results_filtered.index
)

results_b = relations_binbin(
    meta = meta_anth_bin.query("1885 <= year <= 1918"), 
    main_feature = main_feature,
    comp_features = results_filtered.index
)

results_merged = results_a[result_categories].join(
    results_b[result_categories], 
    lsuffix='_1850', rsuffix = '_1885'
)
results_merged['diff_of_diffs'] = results_merged['diff_1885'] - results_merged['diff_1850']
results_merged['diff_of_phis'] = results_merged['phi_1885'] - results_merged['phi_1850']

round(results_merged.sort_values(by = 'diff_of_phis'), 3)

Unnamed: 0,wenn_nicht_1850,wenn_nicht_detail_1850,wenn_ja_1850,wenn_ja_detail_1850,diff_1850,chi2_p_1850,phi_1850,wenn_nicht_1885,wenn_nicht_detail_1885,wenn_ja_1885,wenn_ja_detail_1885,diff_1885,chi2_p_1885,phi_1885,diff_of_diffs,diff_of_phis
sprechinstanz_markiert,0.378,391/1034,0.638,199/312,0.26,0.0,0.221,0.379,145/383,0.628,76/121,0.25,0.0,0.215,-0.01,-0.006
denkmal,0.002,2/1034,0.154,48/312,0.152,0.0,0.339,0.005,2/383,0.174,21/121,0.168,0.0,0.345,0.016,0.006
gegenwartsbezug,0.177,183/1034,0.667,208/312,0.49,0.0,0.455,0.167,64/383,0.669,81/121,0.502,0.0,0.474,0.013,0.019
wissen_identisch,0.089,92/1034,0.269,84/312,0.18,0.0,0.226,0.117,45/383,0.355,43/121,0.238,0.0,0.268,0.058,0.042
zeitmarker_vorhanden,0.347,359/1034,0.638,199/312,0.291,0.0,0.249,0.339,130/383,0.752,91/121,0.413,0.0,0.355,0.122,0.106


In [23]:
results = relations_bincont(
    meta = meta_anth_bin, 
    main_feature = main_feature,
    comp_features = cont_comp_features
)

In [24]:
round(results.sort_values(by = 'pointbiserialr_corr', ascending = False), 2)

Unnamed: 0,wenn_nicht,a_merkmal=0,a_merkmal=1,a_merkmal=2,a_merkmal=3,a_merkmal>=4,wenn_ja,b_merkmal=0,b_merkmal=1,b_merkmal=2,...,pointbiserialr_corr,pointbiserialr_p,ttest_p,cohens_d,mannwhitneyu_stat,mannwhitneyu_p,meandiffs_ci_lower,meandiffs_ci_bootstrap_lower,meandiffs_ci_upper,meandiffs_ci_bootstrap_upper
zeitebenen,1.91,0.0 [0/1417],0.35 [491/1417],0.44 [621/1417],0.18 [250/1417],0.04 [55/1417],2.39,0.0 [0/433],0.08 [35/433],0.54 [233/433],...,0.24,0.0,0.0,-0.59,208645.0,0.0,0.39,0.39,0.57,0.57


In [25]:
meta_plot = meta_anth_bin.copy()

for cont_comp_feature in cont_comp_features:
    mean_main = meta_plot[meta_plot[main_feature] == 1][cont_comp_feature].mean()
    mean_notmain = meta_plot[meta_plot[main_feature] == 0][cont_comp_feature].mean()
    label_main = f"Texte mit Überlieferung<br>(Mittelwert = {round(mean_main, 2)})"
    label_notmain = f"Texte ohne Überlieferung<br>(Mittelwert = {round(mean_notmain, 2)})"
    meta_plot['plot_legend'] = [label_main if x == 1 else label_notmain for x in meta_plot[main_feature]]
        
    fig = px.histogram(
        meta_plot,
        x = cont_comp_feature,
        color = 'plot_legend',
        histnorm = 'probability density',
        barmode = 'group',
        labels = {'plot_legend' : '', 'zeitebenen' : 'Anzahl Zeitebenen',}
    )

    fig.update_layout(
        width = 700, height = 300,
        yaxis_title="Anteil",
        xaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
        yaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
        legend=dict(font = dict(size=16), x=0.52, y = 0.88),
        bargap=0.1
    )
    # fig.write_image(f"plots/6.8 Ueberlieferung – {cont_comp_feature}.pdf")
    fig.show()

In [26]:
result_categories = ['wenn_nicht', 'wenn_ja', 'mannwhitneyu_p', 'pointbiserialr_corr',]

results_a = relations_bincont(
    meta = meta_anth_bin.query("1850 <= year <= 1884"), 
    main_feature = main_feature,
    comp_features = cont_comp_features
)

results_b = relations_bincont(
    meta = meta_anth_bin.query("1885 <= year <= 1918"), 
    main_feature = main_feature,
    comp_features = cont_comp_features
)

results_merged = results_a[result_categories].join(
    results_b[result_categories], 
    lsuffix='_1850', rsuffix = '_1885'
)
results_merged['diff_of_corrs'] = results_merged['pointbiserialr_corr_1885'] - results_merged['pointbiserialr_corr_1850']

round(results_merged.sort_values(by = 'diff_of_corrs'), 3)

Unnamed: 0,wenn_nicht_1850,wenn_ja_1850,mannwhitneyu_p_1850,pointbiserialr_corr_1850,wenn_nicht_1885,wenn_ja_1885,mannwhitneyu_p_1885,pointbiserialr_corr_1885,diff_of_corrs
zeitebenen,1.928,2.365,0.0,0.221,1.869,2.471,0.0,0.289,0.068


In [27]:
# Bewertung Überlieferung vs. Geschichtsauffassungen (positiv)
contingency_table = [
    [meta_anth.query("ueberlieferung == 1 and ueberlieferung_bewertung != 'positiv'").shape[0],
     meta_anth.query("ueberlieferung == 1 and ueberlieferung_bewertung == 'positiv'").shape[0]],
    [meta_anth.query("geschichtsauffassung == 1 and geschichtsauffassung_bewertung != 'positiv'").shape[0],
     meta_anth.query("geschichtsauffassung == 1 and geschichtsauffassung_bewertung == 'positiv'").shape[0]],
]
contingency_table_df = pd.DataFrame(contingency_table)
contingency_table_df.index = ['ueberlieferung', 'geschichtsauffassung']
contingency_table_df.columns = ['nicht_positiv', 'positiv']
contingency_table_df['positiv_share'] = contingency_table_df['positiv']/contingency_table_df.sum(axis=1)

print(contingency_table_df)
print(f"chi2   : {chi2_contingency(contingency_table)[0]}")
print(f"chi2 p : {chi2_contingency(contingency_table)[1]}")
print(f"phi    : {get_phi(np.array(contingency_table))}")

                      nicht_positiv  positiv  positiv_share
ueberlieferung                  189      244       0.563510
geschichtsauffassung             50       26       0.342105
chi2   : 11.850879733653395
chi2 p : 0.0005763411217952217
phi    : 0.15810936339021953


In [28]:
# Bewertung Überlieferung vs. Geschichtsauffassungen (negativ)
contingency_table = [
    [meta_anth.query("ueberlieferung == 1 and ueberlieferung_bewertung != 'negativ'").shape[0],
     meta_anth.query("ueberlieferung == 1 and ueberlieferung_bewertung == 'negativ'").shape[0]],
    [meta_anth.query("geschichtsauffassung == 1 and geschichtsauffassung_bewertung != 'negativ'").shape[0],
     meta_anth.query("geschichtsauffassung == 1 and geschichtsauffassung_bewertung == 'negativ'").shape[0]],
]
contingency_table_df = pd.DataFrame(contingency_table)
contingency_table_df.index = ['ueberlieferung', 'geschichtsauffassung']
contingency_table_df.columns = ['nicht_negativ', 'negativ']
contingency_table_df['negativ_share'] = contingency_table_df['negativ']/contingency_table_df.sum(axis=1)

print(contingency_table_df)
print(f"chi2   : {chi2_contingency(contingency_table)[0]}")
print(f"chi2 p : {chi2_contingency(contingency_table)[1]}")
print(f"phi    : {get_phi(np.array(contingency_table))}")

                      nicht_negativ  negativ  negativ_share
ueberlieferung                  408       25       0.057737
geschichtsauffassung             59       17       0.223684
chi2   : 21.376759350892797
chi2 p : 3.773173442410576e-06
phi    : 0.21495044410251846
