# Notebook for calculating the total of the groups presented in the main paper
Note that the groups themselves were decided through a combination of inspection of the quantitative measures calculated in the analysis and manual inspection of data from individual crises (as well as checks with historical records)

Hence this notebooks primarily exists to groups together crises for calculating the numbers reported in the text.

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib widget

# Load style
plt.style.use('PlotStyle.mplstyle')
import matplotlib.colors as colors
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.Dark2.colors)

from datetime import datetime
from tqdm import tqdm

import os

# Load functions
import sys
sys.path.append("../../ExcessMortality")
import ExcessMortalityFunctions as emf
import AdditionalFunctions as ps


pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 50)

saveFigures = True
saveFigures = False
print('saveFigures is set to: '+str(saveFigures))
print('Done loading packages')

saveFigures is set to: False
Done loading packages


In [2]:
# Set paths
pathData = '../Data/'
pathResults = '../Data/AnalysisResults'
pathFigs = '../Figures/'

In [3]:
# Flags and analysis parameters used in main analysis, set here to load correct results
numYears = 12 # Number of years on both sides of date to use for baseline calculations 
numYearsTot = (numYears*2) # The "name" of the baseline (i.e. +/- 5 years is a 10-year baseline, +/- 12 is a 24 year baseline)
thresholdExcess = 3 # Threshold (in terms of Z-scores) for identifying a day as having increased excess

# # Determine directory in which results was saved
# pathResultsUpper = pathResults + f'_Years{numYears}_Threshold{thresholdExcess}/'

# Additional parameters used
thresholdLower = 2 # Lower threshold used for determining the start and end of periods (in terms of Z-scores)
# maxDaysBelowThreshold = 4 # Number of days below thresholdLower before a period of excess is "stopped"
# minimumLengthOfEpidemic = 4 # Minimal number of days above thresholdExcess which is counted as a period of excess 
maxDaysBelowThreshold = 7 # Number of days below thresholdLower before a period of excess is "stopped"
minimumLengthOfEpidemic = 0 # Minimal number of days above thresholdExcess which is counted as a period of excess 
excessCountThreshold = 50 # Only save mortality crises with more than this number of excess deaths

# Various tests for sensitivity
# numYears = 6 # Number of years on both sides of date to use for baseline calculations 
# maxDaysBelowThreshold = 7 # Number of days below thresholdLower before a period of excess is "stopped"
# excessCountThreshold = 20 # Only save mortality crises with more than this number of excess deaths


# Determine filename to use for final results
# finalResultsFilename = 'AllCrises'+f'_Years{numYears}_Threshold{thresholdExcess}_LowerThreshold{thresholdLower}_MaxDaysBelow{maxDaysBelowThreshold}_minLength{minimumLengthOfEpidemic}_minCount{excessCountThreshold}'
finalResultsFilename = 'AllCrises'+f'_NonSmoothed_Years{numYears}_Threshold{thresholdExcess}_LowerThreshold{thresholdLower}_MaxDaysBelow{maxDaysBelowThreshold}_minLength{minimumLengthOfEpidemic}_minCount{excessCountThreshold}'
finalResultsFilename = finalResultsFilename +'_Clustered'
# finalResultsFilename = finalResultsFilename +'_Clustered_Grouped'
finalResultsFilename 

'AllCrises_NonSmoothed_Years12_Threshold3_LowerThreshold2_MaxDaysBelow7_minLength0_minCount50_Clustered'

In [4]:
# Load the table of results
dfCrises = pd.read_csv(pathData + finalResultsFilename + '.csv')
dfCrises['Start'] = pd.to_datetime(dfCrises['Start'])
dfCrises['End'] = pd.to_datetime(dfCrises['End'])
# dfCrises['DayWithMostBurials'] = pd.to_datetime(dfCrises['DayWithMostBurials'])
dfCrises['DayWithMostDeaths'] = pd.to_datetime(dfCrises['DayWithMostDeaths'])

In [5]:
# Add a flag for group
dfCrises['Group'] = 'None'

In [6]:
# dfCrises.head()

In [7]:
# # dfCrises.head(20)
# # dfCrises[dfCrises.Amt.str.contains('Århus')].head(20)
# print(len(dfCrises[dfCrises.Amt.str.contains('Maribo')]))
# dfCrises[dfCrises.Amt.str.contains('Maribo')]
# # dfCrises[dfCrises.DayWithMostBurials.dt.year == 1872]

In [8]:
# # dfCrises.head()
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1831) & (dfCrises.Amt.str.contains('Præstø'))]
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1831) & (dfCrises.Amt.str.contains('Sorø'))]
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1872) & (dfCrises.Amt.str.contains('Maribo'))]
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1874) & (dfCrises.Amt.str.contains('Odens'))]
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1875) & (dfCrises.Amt.str.contains('Odens'))]
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1875) & (dfCrises.Amt.str.contains('Svend'))]

In [9]:
# # len(dfCrises)
# dfCrises[dfCrises.Amt.str.contains('Thiste')]
# dfCrises[dfCrises.D]

In [10]:
# dfCrises.sort_values('NumberOfDays')

In [11]:
# dfCrises.groupby('Amt').count()['Start']

In [12]:
# dfCrises.groupby('Amt').sum()['Excess']

# Cholera

In [13]:
# dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1853)]

In [14]:
# # # Deadliest crises in 1853 is cholera. Get the cluster-ID
# # cholCluster = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1853)].iloc[0].Cluster

# # # All crises with same age-pattern in 1853 appears to be cholera
# # dfChol = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1853)].copy()

# # # A crisis in Svendborg appears to be in the same cluster, but with no elderly mortality, so sort that out
# # dfChol = dfChol[dfChol['FracAll_60+'] > 0.001]
# # dfChol 

# # Deadliest crises in 1853 is cholera. Get the cluster-ID
# cholCluster = dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1853)].iloc[0].Cluster

# # All crises with same age-pattern in 1853 appears to be cholera
# dfChol = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostDeaths.dt.year == 1853)].copy()

# dfChol

In [15]:
# Deadliest crises in 1853 is cholera. Get the cluster-ID
cholCluster = dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1853)].iloc[0].Cluster

# All crises with same age-pattern in 1853 appears to be cholera
dfChol = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostDeaths.dt.year == 1853)].copy()

# A crisis in Svendborg appears to be in the same cluster, but with no elderly mortality, so sort that out
dfChol = dfChol[dfChol['FracAll_60+'] > 0.001]

# Add label to main dataframe
dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostDeaths.dt.year == 1853),'Group'] = 'Cholera (1853)'

# Print results 
print(dfChol.Amt.values)
print(f'Total excess: {dfChol.Excess.sum()}')
print(f'Indices: {dfChol.index.values}')
print(dfChol.Cluster.value_counts())
dfChol

['Staden København' 'Københavns Amt' 'Ålborg Amt' 'Århus Amt'
 'Frederiksborg Amt' 'Hjørring Amt']
Total excess: 5154
Indices: [  0  11  18  61 337 400]
D    6
Name: Cluster, dtype: int64


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostDeaths,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,Group
0,Staden København,1853-07-02,1853-09-07,67,1853-07-28,3833,546.0,0.468578,Q3,Summer,139473.0,81.519622,444.281691,942.4992,1328.849144,882.05702,62.0,401.0,745.0,1217.0,825.0,212.0,555.0,1069.0,1438.0,989.0,130.480378,110.718309,126.5008,109.150857,106.94298,0.022157,0.120755,0.256169,0.361178,0.239741,3679.206677,D,0.0,0.0,0.0,0.0,0.9897,0.0103,
11,Københavns Amt,1853-07-16,1853-09-24,70,1853-07-31,578,206.0,0.480186,Q3,Summer,76787.0,16.390904,97.367909,149.32477,167.477201,155.673631,27.0,240.0,431.0,424.0,250.0,77.0,138.0,184.0,207.0,218.0,60.609096,40.632091,34.67523,39.522799,62.326369,0.02796,0.16609,0.254719,0.285683,0.265548,586.234414,D,0.0,0.0002,0.0,0.0,0.8438,0.156,
18,Ålborg Amt,1853-08-09,1853-09-17,39,1853-08-18,434,330.0,0.485866,Q3,Summer,69476.0,19.101878,56.522083,119.507082,126.132553,121.216074,49.0,306.0,825.0,910.0,394.0,58.0,75.0,134.0,140.0,152.0,38.898122,18.477917,14.492918,13.867448,30.783926,0.04317,0.127739,0.270085,0.285058,0.273947,442.479669,D,0.0,0.0001,0.0,0.0,0.8751,0.1248,
61,Århus Amt,1853-07-15,1853-09-15,62,1853-08-24,201,138.0,0.488439,Q3,Summer,43290.0,8.343036,23.270502,56.005618,79.659821,47.305049,42.0,94.0,311.0,519.0,177.0,28.0,48.0,74.0,95.0,74.0,19.656964,24.729498,17.994382,15.340179,26.694951,0.03888,0.108445,0.260996,0.371229,0.22045,214.584026,D,0.0,0.0,0.0,0.0,0.9961,0.0039,
337,Frederiksborg Amt,1853-07-15,1853-08-13,29,1853-08-04,57,58.0,0.532051,Q3,Summer,77726.0,9.376153,5.274449,17.887493,11.916761,14.59305,88.0,34.0,136.0,85.0,60.0,20.0,21.0,31.0,26.0,39.0,10.623847,15.725551,13.112507,14.083239,24.40695,0.158789,0.089325,0.302932,0.201815,0.247139,59.047907,D,0.0,0.0069,0.0,0.0,0.818,0.1752,
400,Hjørring Amt,1853-08-14,1853-08-31,17,1853-08-14,51,96.0,0.596154,Q3,Summer,74944.0,3.839448,10.508379,10.477626,11.644575,10.094203,62.0,124.0,232.0,347.0,93.0,10.0,19.0,15.0,15.0,21.0,6.160552,8.491621,4.522374,3.355425,10.905797,0.082455,0.225675,0.225014,0.250076,0.21678,46.56423,D,0.0,0.0314,0.0,0.0,0.7433,0.2253,


In [16]:

# The 1857 cholera was only in Sorø amt
dfChol2 = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostDeaths.dt.year == 1857) & (dfCrises.Amt.str.contains('Sorø'))].copy()

dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostDeaths.dt.year == 1857),'Group'] = 'Cholera (1857)'

print(dfChol2.Amt.values)
print(f'Total excess: {dfChol2.Excess.sum()}')
print(f'Indices: {dfChol2.index.values}')
print(dfChol2.Cluster.value_counts())
dfChol2


['Sorø Amt']
Total excess: 431
Indices: [20]
D    1
Name: Cluster, dtype: int64


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostDeaths,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,Group
20,Sorø Amt,1857-09-03,1857-10-24,51,1857-09-13,431,293.0,0.49827,Q3,Fall,78369.0,59.13914,98.851967,90.269904,115.338439,74.557213,165.0,393.0,540.0,618.0,216.0,95.0,124.0,107.0,134.0,109.0,35.86086,25.148033,16.730096,18.661561,34.442787,0.134973,0.225609,0.206022,0.263236,0.170161,438.156663,D,0.0,0.0218,0.0,0.0,0.8934,0.0847,


In [17]:
# # Deadliest crises in 1853 is cholera. Get the cluster-ID
# cholCluster = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1853)].iloc[0].Cluster

# # All crises with same age-pattern in 1853 appears to be cholera
# dfChol = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1853)].copy()

# # A crisis in Svendborg appears to be in the same cluster, but with no elderly mortality, so sort that out
# dfChol = dfChol[dfChol['FracAll_60+'] > 0.001]

# # Add label to main dataframe
# dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1853),'Group'] = 'Cholera (1853)'

# # Print results 
# print(dfChol.Amt.values)
# print(f'Total excess: {dfChol.Excess.sum()}')
# print(f'Indices: {dfChol.index.values}')
# print(dfChol.Cluster.value_counts())
# dfChol

In [18]:

# # The 1857 cholera was only in Sorø amt
# dfChol2 = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1857) & (dfCrises.Amt.str.contains('Sorø'))].copy()

# dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1857),'Group'] = 'Cholera (1857)'

# print(dfChol2.Amt.values)
# print(f'Total excess: {dfChol2.Excess.sum()}')
# print(f'Indices: {dfChol2.index.values}')
# print(dfChol2.Cluster.value_counts())
# dfChol2


In [19]:
print('Total excess for cholera:')
pd.concat([dfChol,dfChol2]).Excess.sum()

Total excess for cholera:


5585

# Harvest epidemics, 1826-1832
Between 1825 and 1833, most deadly between July to October, mostly cluster H

(Exceptions to cluster G is Holbæk in 1830, and smaller parts in Maribo amt)

A number of crises in 1832 are also included, although the age-cluster differs a little (cluster F instead of H).

In [20]:

dfHarvest = dfCrises.copy()
dfHarvest = dfHarvest[dfHarvest.DayWithMostDeaths > np.datetime64('1825')]
dfHarvest = dfHarvest[dfHarvest.DayWithMostDeaths <= np.datetime64('1833')]

dfHarvest = dfHarvest[dfHarvest.DayWithMostDeaths.dt.month > 7]
dfHarvest = dfHarvest[dfHarvest.DayWithMostDeaths.dt.month <=10]
dfHarvest

# Add some extra crises from 1832 in the same amts
dfHarvestExtra = dfCrises[(dfCrises.DayWithMostDeaths.dt.year == 1832) & (dfCrises.Amt.isin(dfHarvest.Amt.unique()))].copy()
# dfHarvestExtra
dfHarvestExtra
# dfHarvest

# Add extra to dfHarvest
dfHarvest = pd.concat([dfHarvest,dfHarvestExtra]).sort_index()

# Sort away Ribe amt
dfHarvest = dfHarvest[~dfHarvest.Amt.str.contains('Ribe')]

print(dfHarvest.Amt.unique())
print(f'Total excess: {dfHarvest.Excess.sum()}')
print(f'Part of that which is from excess in extra crises in 1832: {dfHarvestExtra.Excess.sum()}')
print(f'Indices: {dfHarvest.index.values}')

print(dfHarvest.Cluster.value_counts())
# Update main dataframe
dfCrises.loc[dfHarvest.index,'Group'] = 'Harvest-epidemics'

# dfHarvest

['Maribo Amt' 'Præstø Amt' 'Sorø Amt' 'Holbæk Amt' 'Københavns Amt'
 'Frederiksborg Amt' 'Svendborg Amt' 'Odense Amt' 'Sønderborg Amt']
Total excess: 11024
Part of that which is from excess in extra crises in 1832: 773
Indices: [  1   2   3   4   6  10  24  26  40  43  51  53  59  60  72  81 121 128
 131 146 150 166 187 194 213 255 275 409]
E    22
D     3
C     2
F     1
Name: Cluster, dtype: int64


In [21]:
# dfHarvest.sort_values('DayWithMostDeaths')
# dfHarvest.Cluster
# dfHarvest[dfHarvest.Cluster == 'D']

In [22]:
# dfHarvest.sort_values('DayWithMostDeaths')

In [23]:

# dfHarvest = dfCrises.copy()
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials > np.datetime64('1825')]
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials <= np.datetime64('1833')]

# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month > 7]
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month <=10]
# dfHarvest

# # Add some extra crises from 1832 in the same amts
# dfHarvestExtra = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1832) & (dfCrises.Amt.isin(dfHarvest.Amt.unique()))].copy()
# # dfHarvestExtra
# dfHarvestExtra
# # dfHarvest


In [24]:
# dfHarvest[~dfHarvest.Amt.str.contains('Ribe')]

In [25]:

# dfHarvest = dfCrises.copy()
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials > np.datetime64('1825')]
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials <= np.datetime64('1833')]

# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month > 7]
# dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month <=10]
# dfHarvest

# # Add some extra crises from 1832 in the same amts
# dfHarvestExtra = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1832) & (dfCrises.Amt.isin(dfHarvest.Amt.unique()))].copy()
# # dfHarvestExtra
# # dfHarvest


# # Add extra to dfHarvest
# dfHarvest = pd.concat([dfHarvest,dfHarvestExtra]).sort_index()

# # Sort away Ribe amt
# dfHarvest = dfHarvest[~dfHarvest.Amt.str.contains('Ribe')]

# print(dfHarvest.Amt.unique())
# print(f'Total excess: {dfHarvest.Excess.sum()}')
# print(f'Part of that which is from excess in extra crises in 1832: {dfHarvestExtra.Excess.sum()}')
# print(f'Indices: {dfHarvest.index.values}')

# print(dfHarvest.Cluster.value_counts())
# # Update main dataframe
# dfCrises.loc[dfHarvest.index,'Group'] = 'Harvest-epidemics'

# # dfHarvest

In [26]:
# dfHarvest

# Child mortality in 1829

High mortality among 1-14 in first half of 1829. 

Cluster C. One of them gets classified as B, but has been determined to be related.

A few crises occur at the same time, but all with mortality among the elderly. These end up in clusters E or F, and are omitted from this group

In [27]:

df1829 = dfCrises.copy()
df1829 = df1829[df1829.DayWithMostDeaths >= np.datetime64('1829')]
df1829 = df1829[df1829.DayWithMostDeaths <= np.datetime64('1830')]

df1829 = df1829[df1829.DayWithMostDeaths.dt.month > 0]
df1829 = df1829[df1829.DayWithMostDeaths.dt.month <=7]

# Remove any with a lot of elderly mortality
df1829 = df1829[df1829.Cluster != 'E']
df1829 = df1829[df1829.Cluster != 'F']

print(df1829.Amt.unique())
print(f'Total excess: {df1829.Excess.sum()}')
print(f'Indices: {df1829.index.values}')

# Update main dataframe
dfCrises.loc[df1829.index,'Group'] = 'Child mortality 1829'
# # df1829.iloc[:,:-6]
display(df1829['Cluster'].value_counts())


['Sorø Amt' 'Maribo Amt' 'Holbæk Amt' 'Svendborg Amt' 'Odense Amt'
 'Københavns Amt' 'Præstø Amt' 'Frederiksborg Amt']
Total excess: 2720
Indices: [  8  17  28  42  64  69  76  95 198 273 290 298 356]


C    12
B     1
Name: Cluster, dtype: int64

In [28]:
# df1829

In [29]:
# df1829[df1829.Cluster == 'B']

In [30]:
# # dfCrises.columns 

# df1829 = dfCrises.copy()
# df1829 = df1829[df1829.DayWithMostBurials >= np.datetime64('1829')]
# df1829 = df1829[df1829.DayWithMostBurials <= np.datetime64('1830')]

# df1829 = df1829[df1829.DayWithMostBurials.dt.month > 0]
# df1829 = df1829[df1829.DayWithMostBurials.dt.month <=7]


# if finalResultsFilename == 'AllCrises_Years12_Threshold3_LowerThreshold2_MaxDaysBelow4_minLength4_minCount50_Clustered':
#     df1829 = df1829[df1829.Cluster != 'F']
#     df1829 = df1829[df1829.Cluster != 'G']
# df1829

In [31]:

# df1829 = dfCrises.copy()
# df1829 = df1829[df1829.DayWithMostBurials >= np.datetime64('1829')]
# df1829 = df1829[df1829.DayWithMostBurials <= np.datetime64('1830')]

# df1829 = df1829[df1829.DayWithMostBurials.dt.month > 0]
# df1829 = df1829[df1829.DayWithMostBurials.dt.month <=7]


# if finalResultsFilename == 'AllCrises_Years12_Threshold3_LowerThreshold2_MaxDaysBelow4_minLength4_minCount50_Clustered':
#     df1829 = df1829[df1829.Cluster != 'F']
#     df1829 = df1829[df1829.Cluster != 'G']

# # df1829 = df1829[df1829.Cluster != 'H']
# # df1829 = df1829[df1829.Cluster != 'G']

# # df1829['Frac_1-14'] = df1829['1-14']/df1829['AgeSplitSum']
# # df1829.sort_values(by='FracAll_1-14',ascending=False).iloc[:,-20:]
# # df1829.sort_values(by='Pct_1-14',ascending=False)
# # df1829.sort_values(by='Cluster2_Prob',ascending=False)


# print(df1829.Amt.unique())
# print(f'Total excess: {df1829.Excess.sum()}')
# print(f'Indices: {df1829.index.values}')

# # Update main dataframe
# dfCrises.loc[df1829.index,'Group'] = 'Child mortality 1829'
# # # df1829.iloc[:,:-6]
# display(df1829['Cluster'].value_counts())
# print('Mortality not in cluster D: '+str(df1829[df1829.Cluster != 'D'].Excess.sum()))

# 1891/1892 Pandemic Flu

Everything around new years 1891/1892

All are cluster E, except Åbenrå which gets classified as cluster F

In [32]:
dfFlu1892 = dfCrises.copy()


dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostDeaths >= np.datetime64('1891-11')]
dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostDeaths <= np.datetime64('1892-03')]

dfFlu1892  
print(np.sort(dfFlu1892.Amt.unique()))

russfluAmt = list(np.sort(dfFlu1892.Amt.unique()))
print(f'Total excess: {dfFlu1892.Excess.sum()}')
print(f'Indices: {dfFlu1892.index.values}')

# Update main dataframe
dfCrises.loc[dfFlu1892.index,'Group'] = 'Pandemic Flu (1891/1892)'

print('Clusters:')
print(dfFlu1892.Cluster.value_counts())
# dfFlu1892

['Bornholms Amt' 'Frederiksborg Amt' 'Haderslev Amt' 'Hjørring Amt'
 'Holbæk Amt' 'Københavns Amt' 'Maribo Amt' 'Odense Amt' 'Præstø Amt'
 'Randers Amt' 'Ribe Amt' 'Ringkøbing Amt' 'Sorø Amt' 'Staden København'
 'Svendborg Amt' 'Thisted Amt' 'Tønder Amt' 'Vejle Amt' 'Viborg Amt'
 'Åbenrå Amt' 'Ålborg Amt' 'Århus Amt']
Total excess: 6463
Indices: [  7   9  12  19  23  29  30  31  33  34  37  41  45  52  54 104 113 126
 209 212 223 243 301 330 382]
Clusters:
F    24
E     1
Name: Cluster, dtype: int64


In [33]:
# dfFlu1892 = dfCrises.copy()


# dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials >= np.datetime64('1891-11')]
# dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials <= np.datetime64('1892-03')]

# dfFlu1892  
# # print(np.sort(dfFlu1892.Amt.unique()))


In [34]:
# dfFlu1892 = dfCrises.copy()


# dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials >= np.datetime64('1891-11')]
# dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials <= np.datetime64('1892-03')]

# dfFlu1892  
# print(np.sort(dfFlu1892.Amt.unique()))

# russfluAmt = list(np.sort(dfFlu1892.Amt.unique()))
# print(f'Total excess: {dfFlu1892.Excess.sum()}')
# print(f'Indices: {dfFlu1892.index.values}')

# # Update main dataframe
# dfCrises.loc[dfFlu1892.index,'Group'] = 'Pandemic Flu (1891/1892)'

# print('Clusters:')
# print(dfFlu1892.Cluster.value_counts())
# # dfFlu1892

In [35]:
print('Counties not experiencing flu: ')
print(np.setdiff1d(dfCrises.Amt.unique(),dfFlu1892.Amt.unique()))
print('(i.e. counties that either do not exist or arent Danish at the time)')

Counties not experiencing flu: 
['Nordborg Amt' 'Skanderborg Amt' 'Sønderborg Amt']
(i.e. counties that either do not exist or arent Danish at the time)


# Pandemic flu, spring 1900 (The so-called "Pseudo-pandemic")


In [36]:
dfPseudo = dfCrises.copy()


dfPseudo = dfPseudo[dfPseudo.DayWithMostDeaths >= np.datetime64('1900-01')]
dfPseudo = dfPseudo[dfPseudo.DayWithMostDeaths <= np.datetime64('1900-08')]

# print(np.sort(dfCrises.Amt.unique()))
print(np.sort(dfPseudo.Amt.unique()))
pseuAmt = list(np.sort(dfPseudo.Amt.unique()))
print(f'Total excess: {dfPseudo.Excess.sum()}')
print(f'Indices: {dfPseudo.index.values}')

# Update main dataframe
dfCrises.loc[dfPseudo.index,'Group'] = 'Pandemic flu (1900)'

print('Clusters:')
print(dfPseudo.Cluster.value_counts())
# dfPseudo

['Frederiksborg Amt' 'Haderslev Amt' 'Hjørring Amt' 'Holbæk Amt'
 'Maribo Amt' 'Odense Amt' 'Præstø Amt' 'Sorø Amt' 'Svendborg Amt'
 'Thisted Amt' 'Vejle Amt' 'Ålborg Amt']
Total excess: 1874
Indices: [ 38  47  48  63  88 100 129 132 219 228 270 287 307 416]
Clusters:
F    13
E     1
Name: Cluster, dtype: int64


In [37]:
# dfPseudo = dfCrises.copy()


# dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials >= np.datetime64('1900-01')]
# dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials <= np.datetime64('1900-08')]
# dfPseudo

In [38]:
# dfPseudo = dfCrises.copy()


# dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials >= np.datetime64('1900-01')]
# dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials <= np.datetime64('1900-08')]

# # print(np.sort(dfCrises.Amt.unique()))
# print(np.sort(dfPseudo.Amt.unique()))
# pseuAmt = list(np.sort(dfPseudo.Amt.unique()))
# print(f'Total excess: {dfPseudo.Excess.sum()}')
# print(f'Indices: {dfPseudo.index.values}')

# # Update main dataframe
# dfCrises.loc[dfPseudo.index,'Group'] = 'Pandemic flu (1900)'

# print('Clusters:')
# print(dfPseudo.Cluster.value_counts())
# dfPseudo

In [39]:
dfBothFlu = pd.concat([dfFlu1892,dfPseudo])
print(dfBothFlu.Cluster.value_counts())
dfBothFlu.Excess.sum()

F    37
E     2
Name: Cluster, dtype: int64


8337

# Scarlatina


Criteria:

Age: Mostly cluster B (Almost all 1-14)

Geography: Mid and northern Jutland

Time: Winter 1857/1858

Comment: High infant mortality is observed in the rest of the country in the fall/winter the following years (in 1858/1859: Odense, Svendborg, Præstø, Maribo, and 1859/1860 in Maribo). 

Further investigation of historical records may reveal this to also be scarlet fever, but it is omitted from the count for the winter 1857/1858.

Something also seems to occur on Bornholm, but is also ignored here.

Extra: Three mortality crises are related, but seem to also have a wave of some disease among elderly, so they end up with a strange age-distribution (and not cluster A)
<!-- (232,117,180) -->

In [40]:
amtJutlandMidNorth = ['Thisted Amt','Randers Amt','Århus Amt','Ålborg Amt','Vejle Amt','Ringkøbing Amt','Viborg Amt','Hjørring Amt','Ribe Amt','Åbenrå Amt','Skanderborg Amt']
amtJutlandMidNorth = ['Thisted Amt','Randers Amt','Århus Amt','Ålborg Amt','Ringkøbing Amt','Viborg Amt','Hjørring Amt','Skanderborg Amt']

In [41]:
# dfScar = dfCrises.copy()
# dfScar = dfScar[dfScar.DayWithMostDeaths >= np.datetime64('1857-06-01')]
# dfScar = dfScar[dfScar.DayWithMostDeaths <= np.datetime64('1858-06-01')]

# dfScar = dfScar[dfScar.Amt.isin(amtJutlandMidNorth)]

# dfScar

In [42]:
dfScar = dfCrises.copy()

dfScar = dfScar[dfScar.DayWithMostDeaths >= np.datetime64('1857')]
dfScar = dfScar[dfScar.DayWithMostDeaths <= np.datetime64('1859')]

dfScar = dfScar[dfScar.Amt.isin(amtJutlandMidNorth)]

dfScar = dfScar[dfScar.Cluster != 'E']

print(dfScar.Amt.unique())
print(f'Total excess: {dfScar.Excess.sum()}')
print(f'Indices: {dfScar.index.values}')
 
# Update main dataframe
dfCrises.loc[dfScar.index,'Group'] = 'Scarlatina'

dfScar.Cluster.value_counts()

['Thisted Amt' 'Ålborg Amt' 'Viborg Amt' 'Randers Amt' 'Århus Amt'
 'Skanderborg Amt']
Total excess: 2401
Indices: [  5  22  36  85 109 137 152 171 180 242 249 303 313 327 332]


B    10
C     5
Name: Cluster, dtype: int64

In [43]:
dfScar[dfScar.Cluster != 'B'].Excess.sum()

435

In [44]:
# dfScar = dfCrises.copy()

# dfScar = dfScar[dfScar.DayWithMostDeaths >= np.datetime64('1857')]
# dfScar = dfScar[dfScar.DayWithMostDeaths <= np.datetime64('1859')]

# dfScar = dfScar[dfScar.Amt.isin(amtJutlandMidNorth)]

# print(dfScar.Amt.unique())
# print(f'Total excess: {dfScar.Excess.sum()}')
# print(f'Indices: {dfScar.index.values}')
 
# # Update main dataframe
# dfCrises.loc[dfScar.index,'Group'] = 'Scarlatina'

# dfScar.Cluster.value_counts()

In [45]:
# dfScar = dfCrises.copy()

# dfScar = dfScar[dfScar.DayWithMostBurials >= np.datetime64('1857')]
# dfScar = dfScar[dfScar.DayWithMostBurials <= np.datetime64('1859')]

# dfScar = dfScar[dfScar.Amt.isin(amtJutlandMidNorth)]

# print(dfScar.Amt.unique())
# print(f'Total excess: {dfScar.Excess.sum()}')
# print(f'Indices: {dfScar.index.values}')
 
# # Update main dataframe
# dfCrises.loc[dfScar.index,'Group'] = 'Scarlatina'

# dfScar.Cluster.value_counts()

In [46]:
# dfScar.Cluster

In [47]:
# dfScar['Pct_1-14']

In [48]:
# dfScar

numFound    = dfCrises[dfCrises.Group != 'None']['Excess'].sum()
numNotFound = dfCrises[dfCrises.Group == 'None']['Excess'].sum()
print(str(numFound) + ' burials grouped')
print(str(numNotFound) + ' burials still ungrouped')
print(f'Hence, {100 * numFound/(numFound+numNotFound):2.0f}% of burials has been classified')

30249 burials grouped
29683 burials still ungrouped
Hence, 50% of burials has been classified


In [49]:
numFound + numNotFound

59932

# War

Crises that coincide with wars.

Manual checks of these suggest that many were probably all war-related, as also suggested by the gender ratio, however no actual grouping was done.

In [50]:
# dfWar = dfCrises.copy()

# dfWar = dfWar[(dfWar.DayWithMostDeaths.dt.year == 1864) | (dfWar.DayWithMostDeaths.dt.year == 1849)]
# # dfWar = dfWar[dfWar.DayWithMostBurials <= np.datetime64('1859')]

# dfWar.sort_values(by='GenderRatio',ascending=False)
# # dfWar


# # indexWar = [195,207,20,63,316,12,123,37]
# # dfWar = dfWar.loc[indexWar]


# print(np.sort(dfWar.Amt.unique()))
# print(f'Total excess: {dfWar.Excess.sum()}')
# print(f'Indices: {dfWar.index.values}')

# # Update main dataframe
# dfCrises.loc[dfWar.index,'Group'] = 'Potentially war-related'

# # dfWar
# dfWar.sort_values(by='GenderRatio',ascending=False)

In [51]:
dfWar = dfCrises.copy()

dfWar = dfWar[(dfWar.DayWithMostDeaths.dt.year == 1864) | (dfWar.DayWithMostDeaths.dt.year == 1849)]
# dfWar = dfWar[dfWar.DayWithMostBurials <= np.datetime64('1859')]

dfWar.sort_values(by='GenderRatio',ascending=False)
# dfWar


# indexWar = [195,207,20,63,316,12,123,37]
# dfWar = dfWar.loc[indexWar]


print(np.sort(dfWar.Amt.unique()))
print(f'Total excess: {dfWar.Excess.sum()}')
print(f'Indices: {dfWar.index.values}')

# Update main dataframe
dfCrises.loc[dfWar.index,'Group'] = 'Possibly war-related'

# dfWar
# dfWar.sort_values(by='GenderRatio',ascending=False)

['Haderslev Amt' 'Maribo Amt' 'Nordborg Amt' 'Odense Amt' 'Præstø Amt'
 'Skanderborg Amt' 'Staden København' 'Svendborg Amt' 'Sønderborg Amt'
 'Vejle Amt' 'Åbenrå Amt' 'Ålborg Amt']
Total excess: 3414
Indices: [ 13  14  44  55  57  66  86  89 119 130 143 170 182 224 251 254 261 264
 289 322 347 350 380 403 405]


In [52]:
# dfWar = dfCrises.copy()

# dfWar = dfWar[(dfWar.DayWithMostBurials.dt.year == 1864) | (dfWar.DayWithMostBurials.dt.year == 1849)]
# # dfWar = dfWar[dfWar.DayWithMostBurials <= np.datetime64('1859')]

# dfWar.sort_values(by='GenderRatio',ascending=False)
# # dfWar


# # indexWar = [195,207,20,63,316,12,123,37]
# # dfWar = dfWar.loc[indexWar]


# print(np.sort(dfWar.Amt.unique()))
# print(f'Total excess: {dfWar.Excess.sum()}')
# print(f'Indices: {dfWar.index.values}')

# # Update main dataframe
# dfCrises.loc[dfWar.index,'Group'] = 'Potentially war-related'

# # dfWar
# dfWar.sort_values(by='GenderRatio',ascending=False)

In [53]:

numFound    = dfCrises[dfCrises.Group != 'None']['Excess'].sum()
numNotFound = dfCrises[dfCrises.Group == 'None']['Excess'].sum()
print(str(numFound) + ' burials grouped')
print(str(numNotFound) + ' burials still ungrouped')
print(f'Hence, {100 * numFound/(numFound+numNotFound):2.0f}% of burials has been classified')

33663 burials grouped
26269 burials still ungrouped
Hence, 56% of burials has been classified


In [54]:
numFound + numNotFound

59932

In [55]:
# dfCrises[dfCrises.Group == 'None']

In [56]:
# Save the results
curFileName = finalResultsFilename + '_Grouped'
dfCrises.to_csv(pathData+ curFileName + '.csv',index=False)

# Save main table
Reorder results and save to top directory

In [57]:
# asdfasdfasdf # To stop execution when running all, to avoid overwriting results
# dfCrisesBackup = dfCrises.copy()
# dfCrises = dfCrisesBackup.copy()

In [58]:
saveToMainFile = True
saveToMainFile = False

In [59]:
dfCrisesMainFile = dfCrises.copy()

In [61]:
ageGroupNames = ['Infants_stillborn','1-14','15-39','40-59','60+']
ageExc = ['Exc_'+x for x in ageGroupNames]
agePct = ['Pct_'+x for x in ageGroupNames]
ageDataSum = ['DataSum_'+x for x in ageGroupNames]
ageBaseline = ['Baseline_'+x for x in ageGroupNames]
ageFracAll = ['FracAll_'+x for x in ageGroupNames]


clusterNames = ['ClusterA_Prob','ClusterB_Prob','ClusterC_Prob','ClusterD_Prob','ClusterE_Prob','ClusterF_Prob']


In [62]:
dfCrisesMainFile.replace([np.inf, -np.inf], np.nan, inplace=True) # Replace infinite values with nan

# Round some of the columns to make it easier to work with for others
dfCrisesMainFile['ExcessPct'] = dfCrisesMainFile['ExcessPct'].astype(int) # Is already an integer, so no need to round
dfCrisesMainFile['PopulationEstimate'] = dfCrisesMainFile['PopulationEstimate'].astype('Int64') # Is already an integer, so no need to round. Has to be Int64 as some are NaN 
dfCrisesMainFile['GenderRatio'] = dfCrisesMainFile['GenderRatio'].round(4)

dfCrisesMainFile[ageExc] = dfCrisesMainFile[ageExc].round(2)
dfCrisesMainFile[ageBaseline] = dfCrisesMainFile[ageBaseline].round(2)
dfCrisesMainFile[ageFracAll] = dfCrisesMainFile[ageFracAll].round(4)
dfCrisesMainFile[agePct] = dfCrisesMainFile[agePct].round().astype('Int64')
dfCrisesMainFile[ageDataSum] = dfCrisesMainFile[ageDataSum].round().astype('Int64')

dfCrisesMainFile[clusterNames] = dfCrisesMainFile[clusterNames].round(4)

dfCrisesMainFile['AgeSplitSumExc'] = dfCrisesMainFile['AgeSplitSumExc'].round(2)

In [63]:
# Reorder manually
# print(dfCrisesMainFile.columns)
newOrder = [
    'Amt',
    'Group',
    'Start',
    'End',
    'DayWithMostDeaths',
    'NumberOfDays',
    'Excess',
    'ExcessPct',
    'GenderRatio',
    'Cluster',
    'Season',
    'TimeOfYear',
    'PopulationEstimate',
    'Exc_Infants_stillborn',
    'Exc_1-14',
    'Exc_15-39',
    'Exc_40-59',
    'Exc_60+',
    'Pct_Infants_stillborn',
    'Pct_1-14',
    'Pct_15-39',
    'Pct_40-59',
    'Pct_60+',
    'DataSum_Infants_stillborn',
    'DataSum_1-14',
    'DataSum_15-39',
    'DataSum_40-59',
    'DataSum_60+',
    'Baseline_Infants_stillborn',
    'Baseline_1-14',
    'Baseline_15-39',
    'Baseline_40-59',
    'Baseline_60+',
    'FracAll_Infants_stillborn',
    'FracAll_1-14',
    'FracAll_15-39',
    'FracAll_40-59',
    'FracAll_60+',
    'AgeSplitSumExc',
    'ClusterA_Prob',
    'ClusterB_Prob',
    'ClusterC_Prob',
    'ClusterD_Prob',
    'ClusterE_Prob',
    'ClusterF_Prob',
]


In [64]:
# Reorder
dfCrisesMainFile = dfCrisesMainFile[newOrder]

In [65]:
# Rename columns to more readable names
ageExcNew = ['Excess_'+x for x in ageGroupNames]
agePctNew = ['PercentExcess_'+x for x in ageGroupNames]
ageFracAllNew = ['FractionOfAllExcess_'+x for x in ageGroupNames]
clusterProbsFull = [x+'ability' for x in clusterNames]

dfCrisesMainFile = dfCrisesMainFile.rename(columns=dict(zip(ageExc,ageExcNew)))
dfCrisesMainFile = dfCrisesMainFile.rename(columns=dict(zip(agePct,agePctNew)))
dfCrisesMainFile = dfCrisesMainFile.rename(columns=dict(zip(ageFracAll,ageFracAllNew)))
dfCrisesMainFile = dfCrisesMainFile.rename(columns=dict(zip(clusterNames,clusterProbsFull)))
dfCrisesMainFile = dfCrisesMainFile.rename(columns={'AgeSplitSumExc':'SumOfAgeSpecificExcess'})

In [66]:
dfCrisesMainFile['Amt'] = dfCrisesMainFile.Amt.str.replace('Amt','County')
dfCrisesMainFile['Amt'] = dfCrisesMainFile.Amt.str.replace('Staden København','Copenhagen City')
dfCrisesMainFile['Amt'] = dfCrisesMainFile.Amt.str.replace('Københavns','Copenhagen')


In [68]:
# dfCrisesMainFile.head()
if saveToMainFile:
    dfCrisesMainFile.to_csv('../SupplementaryTable_AllCrises.csv',index=False)

    # Also save file as excel (with dates as strings to avoid excel-problems)
    dfCrisesMainFileExcel = dfCrisesMainFile.copy()
    dfCrisesMainFileExcel['Start'] = dfCrisesMainFileExcel['Start'].astype(str)
    dfCrisesMainFileExcel['End'] = dfCrisesMainFileExcel['End'].astype(str)
    dfCrisesMainFileExcel['DayWithMostDeaths'] = dfCrisesMainFileExcel['DayWithMostDeaths'].astype(str)

    dfCrisesMainFileExcel.to_excel('../SupplementaryTable_AllCrises.xlsx',index=False)

# Various checks for use in article text

In [57]:
dfCrises.groupby('Group').sum()['Excess']
finalResultsFilename

'AllCrises_NonSmoothed_Years12_Threshold3_LowerThreshold2_MaxDaysBelow7_minLength0_minCount50_Clustered'

In [58]:
# dfCrises[dfCrises.DayWithMostBurials.dt.year == 1872]
dfCrises[dfCrises.DayWithMostDeaths.dt.year == 1872]

Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostDeaths,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,Group
305,Maribo Amt,1872-11-13,1872-11-14,1,1872-11-13,60,700.0,0.5,Q4,Fall,92469.0,1.424242,21.492095,17.227273,12.938735,7.661232,90.0,1425.0,2229.0,1219.0,328.0,3.0,23.0,18.0,14.0,10.0,1.575758,1.507905,0.772727,1.061265,2.338768,0.023447,0.353817,0.283606,0.213006,0.126124,60.743577,D,0.0,0.1466,0.0,0.0,0.8407,0.0127,


In [59]:
# dfCrises[(dfCrises.Cluster == cholCluster)]

# dfCrises[dfCrises.DayWithMostBurials.dt.year == 1872]
dfCrises[dfCrises.DayWithMostDeaths.dt.year == 1877]
dfCrises[dfCrises.DayWithMostDeaths.dt.year == 1874]
# dfCrises[dfCrises.DayWithMostDeaths.dt.year == 1875]

Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostDeaths,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,Group
239,Københavns Amt,1874-05-02,1874-05-18,16,1874-05-02,69,55.0,0.507692,Q2,Spring,111181.0,16.02388,15.515481,17.108696,13.653656,12.942029,64.0,69.0,133.0,71.0,36.0,41.0,38.0,30.0,33.0,49.0,24.97612,22.484519,12.891304,19.346344,36.057971,0.21296,0.206203,0.227377,0.181459,0.172001,75.243742,C,0.0,0.8456,0.0,0.0,0.1063,0.0481,
267,Københavns Amt,1874-07-26,1874-08-12,17,1874-08-10,65,57.0,0.52514,Q3,Summer,111659.0,47.817029,7.847826,3.956192,5.173395,2.025362,132.0,49.0,30.0,37.0,8.0,84.0,24.0,17.0,19.0,27.0,36.182971,16.152174,13.043808,13.826605,24.974638,0.715612,0.117448,0.059207,0.077423,0.030311,66.819805,A,0.0,0.0,1.0,0.0,0.0,0.0,
413,Århus Amt,1874-06-12,1874-06-30,18,1874-06-15,50,37.0,0.526882,Q2,Summer,132028.0,2.167984,21.459721,13.242095,7.739977,9.82971,12.0,78.0,64.0,48.0,32.0,21.0,49.0,34.0,24.0,41.0,18.832016,27.540279,20.757905,16.260023,31.17029,0.039824,0.394194,0.243244,0.142176,0.180562,54.439488,C,0.0003,0.9984,0.0,0.0,0.0005,0.0008,


In [60]:
dfCrises.Amt.value_counts().index

Index(['Staden København', 'Maribo Amt', 'Odense Amt', 'Københavns Amt',
       'Præstø Amt', 'Århus Amt', 'Sorø Amt', 'Holbæk Amt',
       'Frederiksborg Amt', 'Ringkøbing Amt', 'Svendborg Amt', 'Vejle Amt',
       'Hjørring Amt', 'Ålborg Amt', 'Thisted Amt', 'Ribe Amt',
       'Skanderborg Amt', 'Haderslev Amt', 'Randers Amt', 'Viborg Amt',
       'Bornholms Amt', 'Sønderborg Amt', 'Åbenrå Amt', 'Nordborg Amt',
       'Tønder Amt'],
      dtype='object')

In [61]:
allAmts = [
'Staden København', 
'Maribo Amt', 
'Odense Amt', 
'Københavns Amt',
'Præstø Amt', 
'Århus Amt', 
'Sorø Amt', 
'Holbæk Amt',
'Frederiksborg Amt', 
'Ringkøbing Amt', 
'Svendborg Amt', 
'Vejle Amt',
'Hjørring Amt', 
'Ålborg Amt', 
'Thisted Amt', 
'Ribe Amt',
'Skanderborg Amt', 
'Haderslev Amt', 
'Randers Amt', 
'Viborg Amt',
'Bornholms Amt', 
'Sønderborg Amt', 
'Åbenrå Amt', 
'Nordborg Amt',
'Tønder Amt',
]

In [62]:
allAmts = [
'Staden København', 
'Københavns Amt',
'Frederiksborg Amt', 
'Holbæk Amt',
'Sorø Amt', 
'Præstø Amt', 
'Bornholms Amt', 
'Maribo Amt', 
'Odense Amt', 
'Svendborg Amt', 
'Hjørring Amt',
'Thisted Amt', 
'Ålborg Amt', 
'Viborg Amt',
'Ringkøbing Amt', 
'Ribe Amt',
'Randers Amt', 
'Skanderborg Amt', 
'Århus Amt', 
'Vejle Amt',
'Sønderborg Amt', 
'Haderslev Amt', 
'Åbenrå Amt', 
'Nordborg Amt',
'Tønder Amt',
]

In [63]:
# for amt in dfCrises.Amt.unique():
for amt in allAmts:

    curdfAmt = dfCrises[dfCrises.Amt == amt]
    print(f'{amt:18} , {len(curdfAmt):2.0f} , {curdfAmt.Excess.sum():5.0f}')

Staden København   , 53 , 11090
Københavns Amt     , 28 ,  3823
Frederiksborg Amt  , 19 ,  1763
Holbæk Amt         , 21 ,  4305
Sorø Amt           , 22 ,  4511
Præstø Amt         , 25 ,  4532
Bornholms Amt      ,  7 ,   886
Maribo Amt         , 41 ,  6796
Odense Amt         , 33 ,  4068
Svendborg Amt      , 17 ,  1755
Hjørring Amt       , 15 ,  1375
Thisted Amt        , 11 ,  1448
Ålborg Amt         , 13 ,  1901
Viborg Amt         ,  9 ,  1131
Ringkøbing Amt     , 19 ,  1757
Ribe Amt           , 11 ,   746
Randers Amt        ,  9 ,  1039
Skanderborg Amt    ,  9 ,   607
Århus Amt          , 23 ,  2513
Vejle Amt          , 16 ,  1624
Sønderborg Amt     ,  3 ,   696
Haderslev Amt      ,  9 ,  1083
Åbenrå Amt         ,  2 ,   285
Nordborg Amt       ,  2 ,   140
Tønder Amt         ,  1 ,    58


In [64]:
# Define the agegroups analyzed
ageGroups = [
    ['Total'],
    ['Stillborn','0'],
    ['1-4','5-9', '10-14'],
    ['15-19', '20-24', '25-29', '30-34', '35-39'],
    ['40-44', '45-49', '50-54', '55-59'],
    ['60-64', '65-69', '70-74', '75-79', '80+']
]

# And the names used for directories and filenames
ageGroupNames = [
    'Total',
    'Infants_stillborn',
    '1-14',
    '15-39',
    '40-59',
    '60+'
]

ageGroupNamesBaseline = ['Baseline_'+x for x in ageGroupNames[1:]]
ageGroupNamesDataSum = ['DataSum_'+x for x in ageGroupNames[1:]]


In [65]:
dfHarvest[ageGroupNamesDataSum].sum().sum() / dfHarvest[ageGroupNamesBaseline].sum().sum()

2.987753250893402

In [66]:
dfChol[dfChol.Amt.str.contains('Køben')].Excess.sum()
# dfChol[dfChol.Amt.str.contains('Staden ')]

4411

In [67]:
dfChol.Excess.sum()

5154

In [68]:
# dfChol2

In [69]:
# dfScar