# Imports and reading data

In [None]:
import numpy as np
import sys
import pandas as pd
import os
import matplotlib.pyplot as plt
import datetime

MortalityFilename = 'mortality/Mortality_AGBLSterbewoche.csv'
MortalityData = pd.read_csv(MortalityFilename, delimiter = ";",header=0)

CrawlerFilename = 'mortality/DataMerged.csv'
CrawlerData = pd.read_csv(CrawlerFilename, delimiter = ",", names= ["Deathdate","Name","Age"])

# Normalize Numbers by Inhabitants of State

In [None]:
Berlin = MortalityData[MortalityData['Bundesland']=='Berlin']
MortalityData.sort_values(by='Bundesland', inplace=True)

States = list(set(MortalityData['Bundesland']))
States.sort()
Inhabitants = pd.Series([10952,12931,3575,2495,679,1810,6213,1611,7946,17890,4066,997,4082,2236,2882,2158], index=States)
print(Inhabitants)


In [None]:
# Normalize the data with respect to inhabitants of the state

NormalMort = MortalityData.copy()

for i, State in enumerate(States):
    NormalMort.loc[NormalMort.Bundesland==State, 'Todesfälle'] = MortalityData.loc[MortalityData.Bundesland==State, 'Todesfälle'].div(Inhabitants[State])



# Mortality rates over the year for each state

In [None]:
#AgeGroups = set(MortalityData['Altersgruppe'])
AgeGroups = ['0-64', '65-74', '75-84', '85+']

#States = ['Hamburg', 'Bremen', 'Berlin']

Colors = ['g', 'c', 'b', 'r']

Fig, Axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(15,15))
Fig.tight_layout()
StateIndex = 0
Weeks = range(53)
MeanPerWeek = np.zeros((16, 4, 53))
SumPerWeek = np.zeros((16, 4, 53))
MedianPerWeek = np.zeros((16, 4, 53))
for State in States:
    
    for Num, AgeGroup in enumerate(AgeGroups):
        
        FilteredCases = NormalMort[NormalMort['Bundesland']==State]
        FilteredCases = FilteredCases[FilteredCases['Altersgruppe']==AgeGroup] # only contains Cases for State and AgeGroup
        for i in range(1,54):
            MeanPerWeek[StateIndex, Num, i-1] = FilteredCases[FilteredCases['Sterbewoche']==i]['Todesfälle'].mean()
            SumPerWeek[StateIndex, Num, i-1] = FilteredCases[FilteredCases['Sterbewoche']==i]['Todesfälle'].sum()
            MedianPerWeek[StateIndex, Num, i-1] = FilteredCases[FilteredCases['Sterbewoche']==i]['Todesfälle'].median()
        
        Axes[StateIndex%4,StateIndex//4].plot(Weeks, MeanPerWeek[StateIndex, Num, :], Colors[Num])
        #Linestyle = Colors[Num] + '--'
        #Kurz = FilteredCases[FilteredCases.Sterbejahr==2004]['Todesfälle']
        #Axes[StateIndex%4,StateIndex//4].plot(Weeks, Kurz, Linestyle)
        
        Axes[StateIndex%4, StateIndex//4].set_title(State)
        # plot numbers: oben-links, columns runter, dann nach rechts weiter
        
    StateIndex += 1
   
plt.show()

# Germany as a whole: How many people are dying at what age?

In [None]:
# wie viele Leute sterben pro Altersgruppe?
print(SumPerWeek.shape)
Summiert = np.sum(np.sum(SumPerWeek, axis=0), axis=1)
print(Summiert)
plt.figure()
plt.title("Ganz Deutschland: Summe pro Altersgruppe")
plt.bar([0,1,2,3],Summiert)
plt.xticks([0.5,1.5,2.5,3.5], ['0-64', '65-74', '75-84', '85+'])
plt.show()

# TODO: Diese Zahlen über die Jahre beschaut: 

In [None]:
# und für jedes Bundesland aufgeschlüsselt:
LandSummiert = np.sum(SumPerWeek, axis=2)
x = np.arange(16)


width = 0.2
Fig = plt.figure()
Ax = Fig.add_subplot(111)#,figsize=(10,10))

plt.suptitle("Pro Bundesland: Vergleich der Altersgruppen")

Ax.bar(x-width, LandSummiert[:,0], width, color='g')
Ax.bar(x, LandSummiert[:,1], width, color='c')
Ax.bar(x+width, LandSummiert[:,2], width, color='b')
Ax.bar(x+2*width, LandSummiert[:,3], width, color='r')

Ax.set_xlim([-1,17])
ShortNames = []
for s in range(len(States)):
    ShortNames.append(States[s][:3])

print(States)
plt.xticks(np.arange(min(x), max(x)+1, 1.0))
Ax.set_xticks(x, ShortNames)
xtickNames = Ax.set_xticklabels(ShortNames)
plt.setp(xtickNames, rotation=45, fontsize=9)

plt.show()


# Zum Vergleich: Wie ist das in unseren Berlin-Daten?

In [None]:
NotZero = CrawlerData[CrawlerData['Age'] > -1]
Over64 = NotZero[NotZero['Age'] > 64]
Over74 = Over64[Over64['Age'] > 74]
Over84 = Over74[Over74['Age'] > 84]
Age0 = len(NotZero) - len(Over64)
Age1 = len(Over64) - len(Over74)
Age2 = len(Over74) - len(Over84)
Age3 = len(Over84)

Fig, Axes = plt.subplots(2, 3, sharex=True)
Fig.tight_layout()
Axes[0,0].set_title("Todesanzeigen")
Axes[0,0].bar([0,1,2,3],[Age0, Age1, Age2, Age3])

Axes[0,1].bar([0,1,2,3],Summiert)
Axes[0,1].set_title("Ganz D")

Anteil = [Age0, Age1, Age2, Age3] / Summiert
Axes[1,1].bar([0,1,2,3], Anteil)
Axes[1,1].set_title("TA Berlin / Fälle ganz D")

BerlinSum = np.sum(SumPerWeek[8,:,:], axis=1)
Axes[0,2].bar([0,1,2,3], BerlinSum)
Axes[0,2].set_title("Offiziell Berlin")

Axes[1,2].bar([0,1,2,3], [Age0, Age1, Age2, Age3]/BerlinSum)
Axes[1,2].set_title("Anteil Berlin")


plt.xticks([0.5,1.5,2.5,3.5], ['0-64', '65-74', '75-84', '85+'])
plt.xticks(rotation=70)



plt.show()


