# Comparison of data Erik Baeck and Herman van de Vijver

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Dataset 1893-1946
df_1893_1946 = pd.read_csv("../Materials/Merged_Dataset.csv", encoding='utf-8', index_col=False)

In [3]:
#strip leading and trailing whitespaces
for column in df_1893_1946.columns:
    try:
        df_1893_1946[column] = df_1893_1946[column].str.strip()
    except AttributeError:
        continue

# Drop the column Unnamed: 0
df_1893_1946 = df_1893_1946.drop(columns=['Unnamed: 0'])
df_1893_1946.head()

Unnamed: 0,Image,Collection,Production,Date,Occasion,Multiple bill,Performances_ID,Normalised title,Original language,Composer,Original premiere,Original title,genre,Season,Directors,Title_Language,Leaflet_Language,Opmerking
0,K.V.O. programmas 1893-97_00005-20211201_11335...,1893-1897,De Vrijschutter,1893-10-03,Eerste vertoning,False,903641,De Vrijschutter,DUI,Carl Maria von Weber,1821,Der Freischütz,romantische oper,1893-1894,Edward Keurvels & Henry Fontaine,NL,,
1,K.V.O. programmas 1893-97_00009-20211201_11343...,1893-1897,Willem Tell,1893-10-05,Eerste vertoning,False,316913,Willem Tell,DUI,Carl Reinecke,1971,Musik zu Schiller’s 'Wilhelm Tell,romantische oper,1893-1894,Edward Keurvels & Henry Fontaine,Unk,,
2,K.V.O. programmas 1893-97_00011-20211201_11345...,1893-1897,De Vrijschutter,1893-10-10,,False,135474,De Vrijschutter,DUI,Carl Maria von Weber,1821,Der Freischütz,romantische oper,1893-1894,Edward Keurvels & Henry Fontaine,NL,,
3,K.V.O. programmas 1893-97_00013-20211201_11351...,1893-1897,Willem Tell,1893-10-12,,False,520507,Willem Tell,DUI,Carl Reinecke,1971,Musik zu Schiller’s 'Wilhelm Tell,romantische oper,1893-1894,Edward Keurvels & Henry Fontaine,Unk,,
4,K.V.O. programmas 1893-97_00015-20211201_11352...,1893-1897,Charlotte Corday,1893-11-16,,False,446314,Charlotte Corday,NL,Peter Benoit,?,Charlotte Corday,lyrisch drama,1893-1894,Edward Keurvels & Henry Fontaine,Unk,,


# Comparison Van de Vijver
Code to see how many titles there are in the different languages to make a comparison with the counts from Van de Vijver. He claims that in the first playing season (1940-1941), there was a strong shift from French plays. 
- There were 3 French operas 
- 8 German ones. 
- Wagner was well-represented with Lohengrin, Die Walküre, Tannhäuser, and Parsifal. 
- 8 Italian operas, most of which were by Verdi. 
- Flemish works were only sparsely represented, including Gilson, Hullebroeck, and Wambach.

In [4]:
# Filter the DataFrame to select rows with Season 1940-1941
filtered_df = df_1893_1946[df_1893_1946['Season'] == '1940-1941']

# Get the unique titles performed in Season 1940-1941 along with their languages and composers
unique_titles_info = filtered_df[['Normalised title', 'Original language', 'Composer']].drop_duplicates()

# Lists to store the data
titles = []
languages = []
composers = []
counts = []
percentages = []

# Calculate the total number of performances in the season
total_performances = len(filtered_df)

# Count the number of times each title was performed and calculate the percentage
for title in unique_titles_info['Normalised title']:
    title_count = len(filtered_df[filtered_df['Normalised title'] == title])
    title_percentage = (title_count / total_performances) * 100
    
    titles.append(title)
    languages.append(unique_titles_info[unique_titles_info['Normalised title'] == title]['Original language'].values[0])
    composers.append(unique_titles_info[unique_titles_info['Normalised title'] == title]['Composer'].values[0])
    counts.append(title_count)
    percentages.append(title_percentage)

# Create a DataFrame with the collected data
data = {
    'Title': titles,
    'Language': languages,
    'Composer': composers,
    'Count': counts,
    'Percentage': percentages
}
title_performance_df = pd.DataFrame(data)

# Sort the DataFrame
title_performance_df = title_performance_df.sort_values(by='Count', ascending=False)

# Reset the index
title_performance_df = title_performance_df.reset_index(drop=True)
title_performance_df

Unnamed: 0,Title,Language,Composer,Count,Percentage
0,Het Land van den Glimlach,DUI,Franz Lehár,20,9.174312
1,Bohème,ITA,Giacomo Puccini,14,6.422018
2,Aïda,ITA,Giuseppe Verdi,12,5.504587
3,De Parelvisschers,FR,Georges Bizet,12,5.504587
4,De Vledermuis,DUI,Johan Strauss,12,5.504587
5,Carmen,FR,Georges Bizet,11,5.045872
6,Madame Butterfly,ITA,Giacomo Puccini,11,5.045872
7,Faust,FR,Charles Gounod,11,5.045872
8,De Tooverfluit,DUI,Wolfgang Amadeus Mozart,9,4.12844
9,Romeo en Julia,TSJ,Pyotr Tchaikovsky,8,3.669725


In [5]:
# Get the count of titles in Season 1940-1941
title_count = len(filtered_df)
print('Total count of titles:', title_count)

# Count the titles in different languages
language_counts = filtered_df['Original language'].value_counts()

# Calculate the percentages for each language
language_percentages = (language_counts / title_count) * 100

# Print 
print("\nLanguage counts and percentages in Season 1940-1941:")
for language, count in language_counts.items():
    percentage = language_percentages[language]
    print(f"Language: {language}, Count: {count}, Percentage: {percentage:.2f}%")

Total count of titles: 218

Language counts and percentages in Season 1940-1941:
Language: DUI, Count: 76, Percentage: 34.86%
Language: ITA, Count: 70, Percentage: 32.11%
Language: FR, Count: 39, Percentage: 17.89%
Language: NL, Count: 22, Percentage: 10.09%
Language: TSJ, Count: 8, Percentage: 3.67%
Language: ?, Count: 3, Percentage: 1.38%


In [6]:
# Compare the language counts with Van de Vijver
# Count the titles in different languages
language_counts = title_performance_df['Language'].value_counts()

# Print the language counts
print("Language counts in Season 1940-1941:")
print("French operas:", language_counts.get('FR', 0), "(Van de Vijver: 3)")
print("German operas:", language_counts.get('DUI', 0), "(Van de Vijver: 8)")
print("Italian operas:", language_counts.get('ITA', 0), "(Van de Vijver: 8)")

print("Dutch operas:", language_counts.get('NL', 0), "(Van de Vijver: sparse)")
print("Czech operas:", language_counts.get('TSJ', 0))

Language counts in Season 1940-1941:
French operas: 4 (Van de Vijver: 3)
German operas: 10 (Van de Vijver: 8)
Italian operas: 9 (Van de Vijver: 8)
Dutch operas: 4 (Van de Vijver: sparse)
Czech operas: 1


In [7]:
# Count the Wagner operas
lohengrin = filtered_df[filtered_df['Normalised title'].isin(['Lohengrin'])]
walkure = filtered_df[filtered_df['Normalised title'].isin(['De Walkure'])]
tannhauser = filtered_df[filtered_df['Normalised title'].isin(['Tannhauser'])]
parsifal = filtered_df[filtered_df['Normalised title'].isin(['Parsifal'])]
wagner = filtered_df[filtered_df['Normalised title'].isin(['Lohengrin', 'De Walkure', 'Tannhauser', 'Parsifal'])]

# Print the count of Wagner operas
print("Van de Vijver: the Wagnerian works Lohengrin, De Walkure, Tannhauser, Parsifal are well-represented.")
print("Lohengrin:", len(lohengrin))
print("De Walkure:", len(walkure))
print("Tannhäuser:", len(tannhauser))
print("Parsifal:", len(parsifal))
print("Total Wagnerian works:", len(wagner))

# Calculate the percentage
percentage_wagnerian = (len(wagner) / len(filtered_df)) * 100
print(f"Percentage of Lohengrin, De Walkure, Tannhauser, Parsifal in the season: {percentage_wagnerian:.2f}%")

Van de Vijver: the Wagnerian works Lohengrin, De Walkure, Tannhauser, Parsifal are well-represented.
Lohengrin: 7
De Walkure: 7
Tannhäuser: 4
Parsifal: 2
Total Wagnerian works: 20
Percentage of Lohengrin, De Walkure, Tannhauser, Parsifal in the season: 9.17%


In [8]:
# How many times was Parfival performed during WWII?
df_WWII = df_1893_1946[(df_1893_1946['Season'] >= '1940-1941') & (df_1893_1946['Season'] <= '1944-1945')]
parsifal = df_WWII[df_WWII['Normalised title'].isin(['Parsifal'])]
print("Parsifal:", len(parsifal))

Parsifal: 6


In [9]:
# How many of the Italian titles were from Verdi?
# Filter Italian titles
italian_titles = filtered_df[filtered_df['Original language'] == 'ITA']

# Count the number of titles by Verdi
verdi_italian_titles = italian_titles[italian_titles['Composer'].str.contains('Verdi', case=False, na=False)]
verdi_count = len(verdi_italian_titles)
percentage_verdi = (verdi_count / len(italian_titles)) * 100

# Print 
print("Number of Italian titles by Verdi:", verdi_count)
print(f"Percentage of Italian titles by Verdi: {percentage_verdi:.2f}%")

Number of Italian titles by Verdi: 27
Percentage of Italian titles by Verdi: 38.57%


# Comparison Baeck (The Wagner Cult)

He claims:

1940-1941
- total of 23 operas and 2 operettas, 
- four works by Wagner (twelve performances); 
- seven other "German"  operas (37 performances in total, 13 of which were operettas); 
- three French operas (25 performances); 
- eight Italian operas (53 performances) and 
- four Flemish operas (13 performances). 


1941-1942 
- 22 operas and 5 operettas 
- three Wagner operas (14 performances);
- four other "German" operas (18 performances) 
- five "German" operettas (37 performances); 
- nine Italian operas (58 performances), 
- four French operas (24 performances) 
- three Flemish operas (14 performances). 


The 1942-1943 
- 26 different works: 
- six Wagner operas (18 performances), all conducted by Diels; 
- five other "German" operas (36 performances)  
- six "German" operettas (59 performances); 
- three French operas (31 performances); 
- eight Italian operas (71 performances),  
- four Flemish operas (14 performances)  

1943-1944: 

- four other "German" operas (17 performances), 
- eight "German" operettas (79 performances), 
- eight Italian operas (53 performances), 
- two French operas (15 performances)
- two Flemish works (10 performances).

In [10]:
df_1933_1946 = df_1893_1946[(df_1893_1946['Season'] >= '1933-1934') & (df_1893_1946['Season'] <= '1945-1946')]

def calculate_season_statistics(df_1933_1946, season):
    season_df = df_1933_1946[df_1933_1946['Season'] == season]
    total_works = season_df['Normalised title'].nunique()
    total_performances = season_df['Performances_ID'].nunique()
    works_by_wagner = len(season_df[season_df['Composer'].str.contains('Wagner', case=False, na=False)])
    german_operas = len(season_df[season_df['Original language'] == 'DUI'])
    french_operas = len(season_df[season_df['Original language'] == 'FR'])
    italian_operas = len(season_df[season_df['Original language'] == 'ITA'])
    flemish_operas = len(season_df[season_df['Original language'] == 'NL'])
    
    return {
        'Season': season,
        'Total Works': total_works,
        'Total Performances': total_performances,
        'Works by Wagner': works_by_wagner,
        'German Operas': german_operas,
        'French Operas': french_operas,
        'Italian Operas': italian_operas,
        'Flemish Operas': flemish_operas
    }

seasons = ['1940-1941', '1941-1942', '1942-1943', '1943-1944']

season_statistics = [calculate_season_statistics(df_1933_1946, season) for season in seasons]

season_statistics_df = pd.DataFrame(season_statistics)
season_statistics_df

Unnamed: 0,Season,Total Works,Total Performances,Works by Wagner,German Operas,French Operas,Italian Operas,Flemish Operas
0,1940-1941,30,214,20,76,39,70,22
1,1941-1942,33,172,17,73,30,60,5
2,1942-1943,37,221,18,108,30,81,12
3,1943-1944,44,259,54,158,52,72,6
