# Notebook for calculating the total of the groups presented in the main paper
Note that the groups themselves were decided through a combination of inspection of the quantitative measures calculated in the analysis and manual inspection of data from individual crises (as well as checks with historical records)

Hence this notebooks primarily exists to groups together crises for calculating the numbers reported in the text.

In [204]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib widget

# Load style
plt.style.use('PlotStyle.mplstyle')
import matplotlib.colors as colors
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.Dark2.colors)

from datetime import datetime
from tqdm import tqdm

import os

# Load functions
import sys
sys.path.append("../../ExcessMortality")
import ExcessMortalityFunctions as emf
import AdditionalFunctions as ps


pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 50)

saveFigures = True
saveFigures = False
print('saveFigures is set to: '+str(saveFigures))
print('Done loading packages')

saveFigures is set to: False
Done loading packages


In [205]:
# Set paths
pathData = '../Data/'
pathResults = '../Data/AnalysisResults'
pathFigs = '../Figures/'

In [206]:
# Flags and analysis parameters used in main analysis, set here to load correct results
numYears = 12 # Number of years on both sides of date to use for baseline calculations 
numYearsTot = (numYears*2) # The "name" of the baseline (i.e. +/- 5 years is a 10-year baseline, +/- 12 is a 24 year baseline)
thresholdExcess = 3 # Threshold (in terms of Z-scores) for identifying a day as having increased excess

# # Determine directory in which results was saved
# pathResultsUpper = pathResults + f'_Years{numYears}_Threshold{thresholdExcess}/'

# Additional parameters used
thresholdLower = 2 # Lower threshold used for determining the start and end of periods (in terms of Z-scores)
maxDaysBelowThreshold = 4 # Number of days below thresholdLower before a period of excess is "stopped"
minimumLengthOfEpidemic = 4 # Minimal number of days above thresholdExcess which is counted as a period of excess 
excessCountThreshold = 50 # Only save mortality crises with more than this number of excess deaths

# Various tests for sensitivity
# numYears = 6 # Number of years on both sides of date to use for baseline calculations 
# maxDaysBelowThreshold = 7 # Number of days below thresholdLower before a period of excess is "stopped"
# excessCountThreshold = 20 # Only save mortality crises with more than this number of excess deaths


# Determine filename to use for final results
finalResultsFilename = 'AllCrises'+f'_Years{numYears}_Threshold{thresholdExcess}_LowerThreshold{thresholdLower}_MaxDaysBelow{maxDaysBelowThreshold}_minLength{minimumLengthOfEpidemic}_minCount{excessCountThreshold}'
finalResultsFilename = finalResultsFilename +'_Clustered'
finalResultsFilename 

'AllCrises_Years12_Threshold3_LowerThreshold2_MaxDaysBelow4_minLength4_minCount50_Clustered'

In [207]:
# Load the table of results
dfCrises = pd.read_csv(pathData + finalResultsFilename + '.csv')
dfCrises['Start'] = pd.to_datetime(dfCrises['Start'])
dfCrises['End'] = pd.to_datetime(dfCrises['End'])
dfCrises['DayWithMostBurials'] = pd.to_datetime(dfCrises['DayWithMostBurials'])

In [208]:
# Add a flag for group
dfCrises['Group'] = 'None'

# Cholera

In [209]:
# Deadliest crises in 1853 is cholera. Get the cluster-ID
cholCluster = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1853)].iloc[0].Cluster

# All crises with same age-pattern in 1853 appears to be cholera
dfChol = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1853)].copy()

# Add label to main dataframe
dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1853),'Group'] = 'Cholera (1853)'

# Print results 
print(dfChol.Amt.values)
print(f'Total excess: {dfChol.Excess.sum()}')
print(f'Indices: {dfChol.index.values}')
print(dfChol.Cluster.value_counts())
dfChol

['Staden København' 'Københavns Amt' 'Ålborg Amt' 'Århus Amt'
 'Hjørring Amt' 'Frederiksborg Amt']
Total excess: 4148
Indices: [  0  11  23  58 219 309]
F    6
Name: Cluster, dtype: int64


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostBurials,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,ClusterG_Prob,ClusterH_Prob,Group
0,Staden København,1853-07-02,1853-09-06,66,1853-07-20,2835,417,0.487624,Q3,Summer,139411.0,47.978641,353.796466,683.11406,993.815002,727.211227,38,326,527,890,689,175.857143,462.428571,812.714286,1105.428571,832.714286,127.878502,108.632105,129.600226,111.613569,105.503059,0.017099,0.126089,0.243455,0.354186,0.259171,2805.915396,F,0.0,0.0,0.0,0.0,0.0001,0.9423,0.0,0.0576,
11,Københavns Amt,1853-07-17,1853-09-15,60,1853-08-02,575,237,0.485993,Q3,Summer,76794.0,15.908126,96.419772,142.90295,161.278891,146.094979,28,239,434,461,258,73.285714,136.714286,175.857143,196.285714,202.714286,57.377588,40.294513,32.954193,35.006823,56.619306,0.028276,0.171381,0.254002,0.286665,0.259676,562.60472,F,0.0,0.0,0.0,0.0001,0.0018,0.9725,0.0,0.0256,
23,Ålborg Amt,1853-08-09,1853-09-20,42,1853-08-29,421,292,0.482394,Q3,Summer,69508.0,15.711957,53.901562,119.72005,122.187112,107.483124,34,233,665,721,308,61.714286,77.0,137.714286,139.142857,142.428571,46.002329,23.098438,17.994236,16.955745,34.945447,0.037498,0.128642,0.285725,0.291613,0.256521,419.003805,F,0.0,0.0,0.0,0.0,0.0004,0.9892,0.0,0.0104,
58,Århus Amt,1853-07-26,1853-09-13,49,1853-08-21,200,173,0.474843,Q3,Summer,43285.0,9.176242,20.262069,49.995153,71.505835,39.074934,46,90,299,466,163,29.285714,42.857143,66.714286,86.857143,63.0,20.109472,22.595073,16.719132,15.351308,23.925066,0.048292,0.106634,0.263113,0.376318,0.205642,190.014234,F,0.0,0.0,0.0,0.0,0.0,0.9966,0.0,0.0034,
219,Hjørring Amt,1853-08-01,1853-08-24,23,1853-08-07,66,86,0.55,Q3,Summer,74926.0,5.819358,8.238166,5.836204,18.192029,12.512234,61,67,73,313,76,15.428571,20.571429,13.857143,24.0,29.0,9.609213,12.333263,8.020939,5.807971,16.487766,0.115012,0.162816,0.115345,0.359541,0.247287,50.597991,F,0.0,0.0,0.0,0.0003,0.0044,0.8,0.0,0.1953,
309,Frederiksborg Amt,1853-07-21,1853-08-13,23,1853-07-24,51,63,0.549618,Q3,Summer,77708.0,7.951605,2.727538,13.044702,10.831263,12.091262,78,19,110,85,57,18.142857,17.142857,24.857143,23.571429,33.285714,10.191253,14.415319,11.812441,12.740166,21.194452,0.170466,0.058473,0.279651,0.232199,0.259211,46.646369,F,0.0,0.0,0.0,0.0001,0.0078,0.9901,0.0,0.002,


In [210]:

# The only crises in same cluster in 1857 is, as expected, Cholera in Korsør
dfChol2 = dfCrises[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1857)].copy()

dfCrises.loc[(dfCrises.Cluster == cholCluster)&(dfCrises.DayWithMostBurials.dt.year == 1857),'Group'] = 'Cholera (1857)'

print(dfChol2.Amt.values)
print(f'Total excess: {dfChol2.Excess.sum()}')
print(f'Indices: {dfChol2.index.values}')
print(dfChol2.Cluster.value_counts())
dfChol2


['Sorø Amt']
Total excess: 440
Indices: [22]
F    1
Name: Cluster, dtype: int64


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostBurials,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,ClusterG_Prob,ClusterH_Prob,Group
22,Sorø Amt,1857-09-02,1857-10-29,57,1857-09-18,440,250,0.504039,Q3,Fall,78378.0,62.213015,94.44779,92.893728,113.137856,73.985248,126,291,431,445,180,111.571429,126.857143,114.428571,138.571429,115.142857,49.358413,32.409353,21.534844,25.433573,41.157609,0.142469,0.216287,0.212728,0.259088,0.169428,436.677637,F,0.0,0.0,0.0001,0.0137,0.0068,0.979,0.0,0.0003,


# Harvest epidemics, 1826-1832
Between 1825 and 1833, most deadly between July to October, mostly cluster H

(Exceptions to cluster G is Holbæk in 1830, and smaller parts in Maribo amt)

A number of crises in 1832 are also included, although the age-cluster differs a little (cluster F instead of H).

In [211]:

dfHarvest = dfCrises.copy()
dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials > np.datetime64('1825')]
dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials <= np.datetime64('1833')]

dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month > 7]
dfHarvest = dfHarvest[dfHarvest.DayWithMostBurials.dt.month <=10]
dfHarvest

# Add some extra crises from 1832 in the same amts
dfHarvestExtra = dfCrises[(dfCrises.DayWithMostBurials.dt.year == 1832) & (dfCrises.Amt.isin(dfHarvest.Amt.unique()))].copy()
# dfHarvestExtra
# dfHarvest


# Add extra to dfHarvest
dfHarvest = pd.concat([dfHarvest,dfHarvestExtra]).sort_index()

print(dfHarvest.Amt.unique())
print(f'Total excess: {dfHarvest.Excess.sum()}')
print(f'Part of that which is from excess in extra crises in 1832: {dfHarvestExtra.Excess.sum()}')
print(f'Indices: {dfHarvest.index.values}')

print(dfHarvest.Cluster.value_counts())
# Update main dataframe
dfCrises.loc[dfHarvest.index,'Group'] = 'Harvest-epidemics'

# dfHarvest

['Maribo Amt' 'Præstø Amt' 'Sorø Amt' 'Holbæk Amt' 'Københavns Amt'
 'Frederiksborg Amt' 'Svendborg Amt' 'Odense Amt' 'Sønderborg Amt']
Total excess: 11539
Part of that which is from excess in extra crises in 1832: 1133
Indices: [  1   2   3   4   6   9  25  30  34  37  41  42  44  53  56  62  67  95
 105 111 142 153 172 179 180 190 196 197 229 245]
H    20
F     7
E     1
G     1
D     1
Name: Cluster, dtype: int64


# Child mortality in 1829

Rough estimate: High mortality among 1-14 in first half of 1829. (Cluster D, perhaps E and F, but for sure not G and H) 

In [212]:
# dfCrises.columns 

In [213]:

df1829 = dfCrises.copy()
df1829 = df1829[df1829.DayWithMostBurials >= np.datetime64('1829')]
df1829 = df1829[df1829.DayWithMostBurials <= np.datetime64('1830')]

df1829 = df1829[df1829.DayWithMostBurials.dt.month > 0]
df1829 = df1829[df1829.DayWithMostBurials.dt.month <=7]


# if finalResultsFilename == 'AllCrises_Years12_Threshold3_LowerThreshold2_MaxDaysBelow4_minLength4_minCount50_Clustered':
#     df1829 = df1829[df1829.Cluster != 'H']
#     df1829 = df1829[df1829.Cluster != 'G']

# df1829 = df1829[df1829.Cluster != 'H']
# df1829 = df1829[df1829.Cluster != 'G']

# df1829['Frac_1-14'] = df1829['1-14']/df1829['AgeSplitSum']
# df1829.sort_values(by='FracAll_1-14',ascending=False).iloc[:,-20:]
# df1829.sort_values(by='Pct_1-14',ascending=False)
# df1829.sort_values(by='Cluster2_Prob',ascending=False)


print(df1829.Amt.unique())
print(f'Total excess: {df1829.Excess.sum()}')
print(f'Indices: {df1829.index.values}')

# Update main dataframe
dfCrises.loc[df1829.index,'Group'] = 'Child mortality 1829'
# # df1829.iloc[:,:-6]
display(df1829['Cluster'].value_counts())
print('Mortality not in cluster D: '+str(df1829[df1829.Cluster != 'D'].Excess.sum()))

['Sorø Amt' 'Holbæk Amt' 'Maribo Amt' 'Præstø Amt' 'Svendborg Amt'
 'Københavns Amt' 'Odense Amt' 'Ringkøbing Amt' 'Haderslev Amt'
 'Frederiksborg Amt']
Total excess: 3036
Indices: [ 10  20  21  50  66  70  93  97 131 159 184 217 247 248 280 282 287 293
 319]


D    11
E     3
G     2
F     2
H     1
Name: Cluster, dtype: int64

Mortality not in cluster D: 714


# 1891/1892 Pandemic Flu

Everything around new years 1891/1892

All are cluster G, except Copenhagen City and Hjørring Amt (containing part of Aalborg city) which gets classified as cluster E

In [214]:
dfFlu1892 = dfCrises.copy()


dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials >= np.datetime64('1891-11')]
dfFlu1892 = dfFlu1892[dfFlu1892.DayWithMostBurials <= np.datetime64('1892-03')]

dfFlu1892  
print(np.sort(dfFlu1892.Amt.unique()))

russfluAmt = list(np.sort(dfFlu1892.Amt.unique()))
print(f'Total excess: {dfFlu1892.Excess.sum()}')
print(f'Indices: {dfFlu1892.index.values}')

# Update main dataframe
dfCrises.loc[dfFlu1892.index,'Group'] = 'Pandemic Flu (1891/1892)'

print('Clusters:')
print(dfFlu1892.Cluster.value_counts())
# dfFlu1892

['Bornholms Amt' 'Frederiksborg Amt' 'Haderslev Amt' 'Hjørring Amt'
 'Holbæk Amt' 'Københavns Amt' 'Maribo Amt' 'Odense Amt' 'Præstø Amt'
 'Randers Amt' 'Ribe Amt' 'Ringkøbing Amt' 'Sorø Amt' 'Staden København'
 'Svendborg Amt' 'Thisted Amt' 'Vejle Amt' 'Viborg Amt' 'Ålborg Amt'
 'Århus Amt']
Total excess: 7481
Indices: [  5   8  13  14  16  18  19  26  27  28  29  33  35  38  45  55  69  82
  91 168 292]
Clusters:
G    19
E     2
Name: Cluster, dtype: int64


In [215]:
print('Counties not experiencing flu: ')
print(np.setdiff1d(dfCrises.Amt.unique(),dfFlu1892.Amt.unique()))
print('(i.e. counties that either do not exist or arent Danish at the time)')

Counties not experiencing flu: 
['Nordborg Amt' 'Skanderborg Amt' 'Sønderborg Amt' 'Åbenrå Amt']
(i.e. counties that either do not exist or arent Danish at the time)


# Pandemic flu, spring 1900 (The so-called "Pseudo-pandemic")


In [216]:
dfPseudo = dfCrises.copy()


dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials >= np.datetime64('1900-01')]
dfPseudo = dfPseudo[dfPseudo.DayWithMostBurials <= np.datetime64('1900-08')]

# print(np.sort(dfCrises.Amt.unique()))
print(np.sort(dfPseudo.Amt.unique()))
pseuAmt = list(np.sort(dfPseudo.Amt.unique()))
print(f'Total excess: {dfPseudo.Excess.sum()}')
print(f'Indices: {dfPseudo.index.values}')

# Update main dataframe
dfCrises.loc[dfPseudo.index,'Group'] = 'Pandemic flu (1900)'

print('Clusters:')
print(dfPseudo.Cluster.value_counts())
dfPseudo

['Frederiksborg Amt' 'Hjørring Amt' 'Holbæk Amt' 'Maribo Amt' 'Odense Amt'
 'Præstø Amt' 'Randers Amt' 'Sorø Amt' 'Staden København' 'Svendborg Amt'
 'Thisted Amt' 'Vejle Amt' 'Ålborg Amt']
Total excess: 2051
Indices: [ 40  43  47  54  68  71  76  89 122 174 189 258 273]
Clusters:
G    10
E     3
Name: Cluster, dtype: int64


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostBurials,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,ClusterG_Prob,ClusterH_Prob,Group
40,Odense Amt,1900-03-14,1900-04-28,45,1900-04-11,304,84,0.479228,Q2,Spring,150474.0,35.202555,13.287479,19.752117,24.908103,213.931418,45,30,52,61,135,113.714286,57.0,57.857143,65.428571,372.0,78.511731,43.712521,38.105025,40.520469,158.068582,0.114636,0.04327,0.064322,0.081112,0.69666,307.081672,G,0.0,0.0,0.0,0.0,0.0009,0.0,0.999,0.0001,
43,Maribo Amt,1900-03-07,1900-04-26,50,1900-03-24,267,90,0.495606,Q1,Spring,104700.0,37.707863,7.191138,12.449628,27.91382,185.093403,52,23,44,80,147,110.0,38.571429,40.714286,62.857143,311.142857,72.292137,31.38029,28.264657,34.943323,126.049454,0.139475,0.026599,0.046049,0.103248,0.684629,270.355852,G,0.0,0.0,0.0,0.0,0.0021,0.0,0.9977,0.0003,
47,Hjørring Amt,1900-04-09,1900-06-07,59,1900-05-27,251,61,0.51682,Q2,Spring,118875.0,82.436853,15.755294,15.784279,28.39888,117.391728,95,23,29,69,84,169.428571,83.857143,70.428571,69.428571,257.0,86.991718,68.101849,54.644292,41.029691,139.608272,0.317349,0.060652,0.060763,0.109324,0.451912,259.767034,E,0.0,0.0,0.0052,0.0004,0.9924,0.0,0.0018,0.0002,
54,Svendborg Amt,1900-03-07,1900-05-01,55,1900-04-14,213,58,0.469775,Q2,Spring,127736.0,30.483155,-3.344273,21.144696,12.101473,154.714924,41,-9,53,29,91,104.142857,34.428571,60.857143,54.142857,324.142857,73.659703,37.772845,39.712447,42.041384,169.427933,0.139547,0.0,0.096797,0.055398,0.708258,218.444247,G,0.0,0.0,0.0,0.0,0.0007,0.0,0.9992,0.0,
68,Staden København,1900-03-01,1900-03-23,22,1900-03-04,172,43,0.518784,Q1,Spring,393177.0,14.346014,4.55823,6.640269,28.29529,124.020704,16,7,13,40,113,102.285714,69.142857,59.714286,99.285714,233.428571,87.9397,64.584627,53.074017,70.990424,109.407867,0.080659,0.025628,0.037334,0.159087,0.697292,177.860507,G,0.0,0.0,0.0,0.0,0.0003,0.0,0.997,0.0027,
71,Præstø Amt,1900-03-15,1900-04-28,44,1900-04-14,165,63,0.444965,Q2,Spring,103118.0,24.163043,-4.689347,6.123377,34.083592,107.889657,49,-14,22,108,92,73.428571,27.714286,34.142857,65.571429,224.714286,49.265528,32.403633,28.019481,31.487836,116.824628,0.140271,0.0,0.035547,0.197862,0.62632,172.25967,G,0.0,0.0,0.0,0.0,0.0105,0.0,0.8982,0.0912,
76,Holbæk Amt,1900-03-18,1900-04-14,27,1900-03-30,160,102,0.46875,Q1,Spring,98037.0,14.06597,-1.727706,-4.157867,14.080816,140.853355,41,-9,-26,81,212,48.714286,17.0,11.857143,31.571429,207.142857,34.648315,18.727706,16.01501,17.490613,66.289502,0.083231,0.0,0.0,0.083318,0.833451,169.000141,G,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,
89,Sorø Amt,1900-03-09,1900-04-23,45,1900-04-20,139,56,0.522959,Q2,Spring,94066.0,27.470709,2.927358,10.257505,13.307006,89.518657,53,10,44,48,82,79.142857,32.714286,33.428571,41.142857,198.142857,51.672149,29.786928,23.171066,27.835851,108.6242,0.191459,0.020402,0.07149,0.092744,0.623905,143.481235,G,0.0,0.0,0.0,0.0,0.0506,0.0,0.9487,0.0008,
122,Vejle Amt,1900-03-14,1900-04-04,21,1900-03-23,109,67,0.464945,Q1,Spring,124508.0,12.314182,-7.947464,7.296325,21.00647,73.959886,39,-35,35,108,120,44.0,14.714286,28.142857,40.428571,135.571429,31.685818,22.661749,20.846532,19.422101,61.611542,0.107475,0.0,0.063681,0.18334,0.645505,114.576863,G,0.0,0.0,0.0,0.0,0.0028,0.0,0.9633,0.0339,
174,Thisted Amt,1900-03-17,1900-04-17,31,1900-03-18,81,62,0.448113,Q1,Spring,71290.0,13.952428,-3.275934,10.477814,3.32604,58.940994,64,-17,68,25,108,35.714286,16.142857,25.857143,16.714286,113.428571,21.761858,19.418791,15.379329,13.388246,54.487578,0.160933,0.0,0.120855,0.038364,0.679848,86.697276,G,0.0,0.0,0.0,0.0,0.0033,0.0,0.9967,0.0,


# Scarlatina


Criteria:

Age: Mostly cluster A (Almost all 1-14)

Geography: Mid and northern Jutland

Time: Winter 1857/1858

Comment: High infant mortality is observed in the rest of the country in the fall/winter the following years (in 1858/1859: Odense, Svendborg, Præstø, Maribo, and 1859/1860 in Maribo). Should also be calculated seperately
A comment about Bornholm may also be good

Extra: Three mortality crises are related, but seem to also have a wave of some disease among elderly, so they end up with a strange age-distribution (and not cluster A)
<!-- (232,117,180) -->

In [217]:
amtJutlandMidNorth = ['Thisted Amt','Randers Amt','Århus Amt','Ålborg Amt','Vejle Amt','Ringkøbing Amt','Viborg Amt','Hjørring Amt','Ribe Amt','Åbenrå Amt','Skanderborg Amt']

In [218]:
dfScar = dfCrises.copy()

dfScar = dfScar[dfScar.DayWithMostBurials >= np.datetime64('1857')]
dfScar = dfScar[dfScar.DayWithMostBurials <= np.datetime64('1859')]

dfScar = dfScar[dfScar.Amt.isin(amtJutlandMidNorth)]

print(dfScar.Amt.unique())
print(f'Total excess: {dfScar.Excess.sum()}')
print(f'Indices: {dfScar.index.values}')
 
# Update main dataframe
dfCrises.loc[dfScar.index,'Group'] = 'Scarlatina'

dfScar.Cluster.value_counts()

['Thisted Amt' 'Ålborg Amt' 'Viborg Amt' 'Randers Amt' 'Århus Amt'
 'Ringkøbing Amt' 'Skanderborg Amt' 'Hjørring Amt']
Total excess: 2323
Indices: [  7  31  73 100 109 115 133 156 175 178 183 187 222 232 235 240 318]


A    13
E     3
C     1
Name: Cluster, dtype: int64

In [219]:
dfScar['Pct_1-14']

7      814
31     333
73     269
100    289
109    263
115    183
133    295
156    342
175    227
178    184
183    278
187    386
222    451
232    280
235    194
240     99
318    170
Name: Pct_1-14, dtype: int64

In [220]:
# dfScar

# War

Crises that coincide with wars.

Manual checks of these suggest that many were probably all war-related, as also suggested by the gender ratio, however no actual grouping was done.

In [221]:
dfWar = dfCrises.copy()

dfWar = dfWar[(dfWar.DayWithMostBurials.dt.year == 1864) | (dfWar.DayWithMostBurials.dt.year == 1849)]
# dfWar = dfWar[dfWar.DayWithMostBurials <= np.datetime64('1859')]

dfWar.sort_values(by='GenderRatio',ascending=False)
# dfWar


# indexWar = [195,207,20,63,316,12,123,37]
# dfWar = dfWar.loc[indexWar]


print(np.sort(dfWar.Amt.unique()))
print(f'Total excess: {dfWar.Excess.sum()}')
print(f'Indices: {dfWar.index.values}')

# Update main dataframe
dfCrises.loc[dfWar.index,'Group'] = 'Potentially war-related'

# dfWar
dfWar.sort_values(by='GenderRatio',ascending=False)

['Haderslev Amt' 'Nordborg Amt' 'Odense Amt' 'Randers Amt'
 'Skanderborg Amt' 'Staden København' 'Svendborg Amt' 'Sønderborg Amt'
 'Thisted Amt' 'Vejle Amt' 'Åbenrå Amt']
Total excess: 3183
Indices: [ 12  17  36  60  75  84 114 119 132 135 143 144 151 191 193 200 203 230
 269 271 296 298 310]


Unnamed: 0,Amt,Start,End,NumberOfDays,DayWithMostBurials,Excess,ExcessPct,GenderRatio,TimeOfYear,Season,PopulationEstimate,Exc_Infants_stillborn,Exc_1-14,Exc_15-39,Exc_40-59,Exc_60+,Pct_Infants_stillborn,Pct_1-14,Pct_15-39,Pct_40-59,Pct_60+,DataSum_Infants_stillborn,DataSum_1-14,DataSum_15-39,DataSum_40-59,DataSum_60+,Baseline_Infants_stillborn,Baseline_1-14,Baseline_15-39,Baseline_40-59,Baseline_60+,FracAll_Infants_stillborn,FracAll_1-14,FracAll_15-39,FracAll_40-59,FracAll_60+,AgeSplitSumExc,Cluster,ClusterA_Prob,ClusterB_Prob,ClusterC_Prob,ClusterD_Prob,ClusterE_Prob,ClusterF_Prob,ClusterG_Prob,ClusterH_Prob,Group
191,Staden København,1864-04-24,1864-05-04,10,1864-05-01,75,64,0.715736,Q2,Spring,165969.0,7.653468,1.078487,27.97619,10.151398,-3.024586,40,5,148,52,-13,27.0,23.428571,46.857143,29.571429,20.142857,19.346532,22.350085,18.880952,19.420031,23.167443,0.163328,0.023015,0.597022,0.216635,0.0,46.859543,F,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,
203,Nordborg Amt,1864-04-12,1864-05-22,40,1864-04-19,70,179,0.642202,Q2,Spring,,11.587921,14.812182,9.04497,2.839944,19.980667,169,229,229,54,143,18.428571,21.285714,13.0,8.142857,34.0,6.84065,6.473532,3.95503,5.302913,14.019333,0.198881,0.254218,0.155237,0.048741,0.342923,58.265685,E,0.0,0.0,0.0026,0.1118,0.8855,0.0,0.0001,0.0,
60,Åbenrå Amt,1864-02-21,1864-05-21,90,1864-03-18,197,130,0.632184,Q1,Spring,,45.714481,44.425126,38.211898,13.500271,41.749224,170,150,210,72,99,72.571429,74.0,56.428571,32.285714,83.714286,26.856948,29.574874,18.216674,18.785444,41.965062,0.248988,0.241966,0.208125,0.07353,0.227391,183.600999,D,0.0001,0.0,0.2734,0.5167,0.2093,0.0006,0.0,0.0,
17,Odense Amt,1849-05-12,1849-09-01,112,1849-07-19,491,88,0.631429,Q3,Summer,101359.0,22.500776,110.209863,77.823052,26.460568,20.128623,20,106,87,33,12,135.428571,214.0,167.428571,106.285714,181.285714,112.927795,103.790137,89.605519,79.825146,161.157091,0.08751,0.428627,0.302669,0.10291,0.078284,257.122883,D,0.0076,0.0,0.0002,0.9878,0.0009,0.0034,0.0,0.0,
230,Skanderborg Amt,1864-11-06,1864-12-07,31,1864-11-20,63,72,0.609272,Q4,Fall,63266.0,11.149115,38.365596,8.914596,-0.642884,1.366813,94,243,69,-5,7,23.0,54.142857,21.857143,12.0,22.285714,11.850885,15.777261,12.942547,12.642884,20.918902,0.186452,0.641607,0.149083,0.0,0.022858,59.79612,A,0.9848,0.0,0.0,0.0152,0.0,0.0,0.0,0.0,
36,Odense Amt,1864-03-28,1864-06-17,81,1864-05-23,310,58,0.591017,Q2,Spring,116127.0,42.497718,48.663302,80.742633,19.783926,60.709063,40,46,107,27,36,147.714286,153.571429,156.0,93.428571,228.285714,105.216568,104.908126,75.257367,73.644645,167.576652,0.168377,0.192805,0.319904,0.078384,0.24053,252.396642,E,0.0,0.0,0.0056,0.205,0.6621,0.1273,0.0,0.0,
298,Svendborg Amt,1864-05-11,1864-05-26,15,1864-05-14,54,68,0.583942,Q2,Spring,109270.0,16.461439,11.985766,12.345779,1.059524,3.815476,105,81,117,10,15,32.142857,26.857143,22.857143,11.428571,29.571429,15.681418,14.871377,10.511364,10.369048,25.755952,0.360459,0.262454,0.270338,0.023201,0.083548,45.667984,C,0.0,0.0,0.9935,0.0064,0.0,0.0,0.0,0.0,
12,Sønderborg Amt,1864-02-29,1864-08-20,173,1864-04-03,550,219,0.578882,Q2,Spring,,73.341079,161.707377,68.827687,73.61149,110.65268,155,340,224,220,149,120.714286,209.285714,99.571429,107.0,185.142857,47.373207,47.578338,30.743741,33.38851,74.490177,0.150246,0.331272,0.141,0.1508,0.226682,488.140313,D,0.0005,0.0,0.0023,0.8619,0.134,0.0012,0.0,0.0,
132,Staden København,1864-05-10,1864-06-01,22,1864-05-22,101,43,0.578171,Q2,Spring,166118.0,-3.10999,29.332298,26.446429,12.708333,6.329193,-8,68,64,33,14,35.714286,72.428571,67.571429,51.142857,52.714286,38.824275,43.096273,41.125,38.434524,46.385093,0.0,0.392058,0.353485,0.169861,0.084596,74.816253,F,0.0001,0.0,0.0,0.2021,0.0011,0.7967,0.0,0.0,
193,Odense Amt,1864-02-29,1864-03-23,23,1864-03-20,74,44,0.559671,Q1,Spring,115943.0,27.00427,14.927254,9.682606,-1.955487,14.241071,79,48,48,-8,26,61.285714,46.142857,30.0,21.428571,69.285714,34.281444,31.215603,20.317394,23.384058,55.044643,0.410055,0.226668,0.147029,0.0,0.216248,65.855202,C,0.0,0.0,0.9984,0.0015,0.0001,0.0,0.0,0.0,


In [222]:

numFound    = dfCrises[dfCrises.Group != 'None']['Excess'].sum()
numNotFound = dfCrises[dfCrises.Group == 'None']['Excess'].sum()
print(str(numFound) + ' burials grouped')
print(str(numNotFound) + ' burials still ungrouped')
print(f'Hence, {100 * numFound/(numFound+numNotFound):2.0f}% of burials has been classified')

34201 burials grouped
18090 burials still ungrouped
Hence, 65% of burials has been classified


In [223]:
# Save the results
curFileName = finalResultsFilename + '_Grouped'
dfCrises.to_csv(pathData+ curFileName + '.csv',index=False)

In [224]:
dfCrises.groupby('Group').sum()['Excess']

Group
Child mortality 1829         3036
Cholera (1853)               4148
Cholera (1857)                440
Harvest-epidemics           11539
None                        18090
Pandemic Flu (1891/1892)     7481
Pandemic flu (1900)          2051
Potentially war-related      3183
Scarlatina                   2323
Name: Excess, dtype: int64