In [76]:
# Load packages and settings
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 50)
import seaborn as sns


import matplotlib.pyplot as plt
%matplotlib widget
plt.rcParams['figure.figsize'] = (12,8)
plt.rcParams["image.cmap"] = "tab10"
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.tab10.colors)
fs_label = 16
parameters = {
                'axes.labelsize': fs_label,
                'axes.titlesize': fs_label+4,
                'xtick.labelsize': fs_label,
                'ytick.labelsize': fs_label, 
                'legend.fontsize': fs_label, 
                'lines.markersize': 10,
                'lines.linewidth': 3
             }
plt.rcParams.update(parameters)
%matplotlib widget
from matplotlib import cm # Colormaps
import matplotlib.colors as colors
# cmap = plt.cm.get_cmap('Dark2',len(ageGroups))

import locale
import matplotlib.dates as mdates
locale.setlocale(locale.LC_TIME,"Danish")
# ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
# ax1.spines['top'].set_visible(False) 

import os
# import csv
import math

import datetime
from datetime import date


# ax.legend(bbox_to_anchor=(1.04,0.5), loc="center left")

saveFigures = True
# saveFigures = False
print('saveFigures is set to: '+str(saveFigures))

print('Done loading packages')

# Define running mean functions
def rnMean(data,meanWidth):
    return np.convolve(data, np.ones(meanWidth)/meanWidth, mode='valid')
def rnTime(t,meanWidth):
    return t[math.floor(meanWidth/2):-math.ceil(meanWidth/2)+1]

saveFigures is set to: True
Done loading packages


In [77]:
# Define paths
rootdir_data = os.getcwd() +"\\..\\DanskeData\\" 

path_data = rootdir_data + "ssi_data\\"
path_dash = rootdir_data + "ssi_dashboard\\"
path_vacc = rootdir_data + "ssi_vacc\\"

path_figs = os.getcwd() +"\\..\\Figures\\" 

In [78]:
latestsubdir = list(os.walk(path_dash))[0][1][-1]
latestdir = path_dash + latestsubdir

df = pd.read_csv(latestdir+'/Regionalt_DB/03_bekraeftede_tilfaelde_doede_indlagte_pr_dag_pr_koen.csv',encoding='latin1',delimiter = ';')
df['Prøvetagningsdato'] = pd.to_datetime(df['Prøvetagningsdato'])

kortdf  = pd.read_csv(latestdir+'/Regionalt_DB/13_regionale_kort.csv',encoding='latin1',delimiter = ';')
# kortdf

In [79]:
latestsubdir

'SSI_dashboard_2022-02-08'

In [80]:


# Data is (only) in the file from the most recent tuesday. 
# Should be made smarter, but here hardcoded
# tuePath = 'SSI_data_2022-01-25'
# Now automatic finding of latest tuesday:
for k in range(0,7):
    dayToCheck = np.datetime64('today')-np.timedelta64(k,'D')
    thisWeekDay = (dayToCheck).astype(datetime.datetime).isoweekday()    
    if (thisWeekDay == 2):
        tuePath = 'SSI_data_'+str(dayToCheck)

print(f'Path to latest Tuesday was {tuePath}')

dirPath = path_data + tuePath + '\\'

df1 = pd.read_csv(dirPath+'gennembrudsinfektioner_table1.csv',delimiter=';')

df2_C = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_cases.csv',delimiter=';')
df2_H = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_indlagte.csv',delimiter=';')
df2_D = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_dode.csv',delimiter=';')
df2_R = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_repositive.csv',delimiter=';')
df2_Int = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_intensiv.csv',delimiter=';')
df2_T = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_antal_tests.csv',delimiter=';')

df3 = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_alle.csv',delimiter=';',decimal=",")
df3_C = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_cases.csv',delimiter=';',decimal=",")
df3_H = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_indlagte.csv',delimiter=';',decimal=",")
df3_D = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_dode.csv',delimiter=';',decimal=",")
df3_Int = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_intensiv.csv',delimiter=';',decimal=",")
df3_T = pd.read_csv(dirPath+'gennembrudsinfektioner_table2_incidence_tests.csv',delimiter=';',decimal=",")

Path to latest Tuesday was SSI_data_2022-02-08


In [81]:
# weekDTs
# df2_D

In [82]:

weekDTs = [np.datetime64(datetime.datetime.strptime(d[-4:] + '-W'+d[4:6]+'-1', "%Y-W%W-%w")) for d in df1.Ugenummer]

curOrder = np.argsort(weekDTs)
weekDTs = np.array(weekDTs)
weekDTs = weekDTs[curOrder]
        
sNone = 'Ingen vaccination'        
sOne = 'Første vaccination'  
sTwo = 'Anden vaccination'
sFull = 'Fuld effekt efter primært forløb'   
sReva = 'Fuld effekt efter revaccination'


ageGroups = df2_C.Aldersgruppe.values
# print(ageGroups)
# weekNames = df1.Ugenummer
weekNames = df1.Ugenummer.values[curOrder]
weekNamesShort = [x[4:6] for x in weekNames]
wInt = [int(x[4:6]) for x in weekNames]
wIntRange = np.arange(len(wInt))

allDates = np.array(weekDTs)[curOrder]
print(weekNames)

# Make function for gettings particular parts
def getTimeSeriesSingle(thisdf=df2_C,curStatus='Ingen vaccination',curAge='Alle',weekNames=weekNames):
    
    agedf = thisdf[thisdf.Aldersgruppe==curAge]
    allVals = []
    for curWeek in weekNames:
        toAdd = agedf[curWeek+'_'+curStatus].values[0]
        allVals.append(toAdd)
    allVals = np.array(allVals)

    return allVals


def getTimeSeries(thisdf=df2_C,curStatus='Ingen vaccination',curAge='Alle',weekNames=weekNames):
    # If curAge is just a string, return the given array
    if (type(curAge)==str):
        agedf = thisdf[thisdf.Aldersgruppe==curAge]
        allVals = []
        for curWeek in weekNames:
            toAdd = agedf[curWeek+'_'+curStatus].values[0]
            allVals.append(toAdd)
        allVals = np.array(allVals)
        
        return allVals 
        
    else:
        # If curAge is not a string, it is assumed to be a list of strings
        totCount = []
        for thisAge in curAge:
            agedf = thisdf[thisdf.Aldersgruppe==thisAge]
            allVals = []
            for curWeek in weekNames:
                toAdd = agedf[curWeek+'_'+curStatus].values[0]
                allVals.append(toAdd)
            allVals = np.array(allVals)
            if len(totCount) == 0:
                totCount = allVals 
            else:
                totCount = totCount + allVals
        return totCount

def getTimeSeriesAll(thisdf=df2_C,curAge='Alle',weekNames=weekNames):
    return getTimeSeries(thisdf,sNone,curAge,weekNames)+getTimeSeries(thisdf,sOne,curAge,weekNames)+getTimeSeries(thisdf,sTwo,curAge,weekNames)

['Uge 42-2021' 'Uge 43-2021' 'Uge 44-2021' 'Uge 45-2021' 'Uge 46-2021'
 'Uge 47-2021' 'Uge 48-2021' 'Uge 49-2021' 'Uge 50-2021' 'Uge 51-2021'
 'Uge 52-2021' 'Uge 01-2022' 'Uge 02-2022' 'Uge 03-2022' 'Uge 04-2022'
 'Uge 05-2022']


In [83]:
print(ageGroups)

print(getTimeSeriesAll(df2_D,curAge=['70-79','80+']))
print(getTimeSeriesAll(df2_D,curAge='70-79'))
print(getTimeSeriesAll(df2_D,curAge='80+'))

['0-5' '6-11' '12-15' '16-19' '20-29' '30-39' '40-49' '50-59' '60-64'
 '65-69' '70-79' '80+' '12+' 'Alle']
[  8  14  18  25  33  62  60  54  63  73  56  82  91  88 103 120]
[ 3  4  3  8  9 23 18 22 21 27 23 27 25 20 38 26]
[ 5 10 15 17 24 39 42 32 42 46 33 55 66 68 65 94]


# Compare "gennembruds" files with regional files

In [84]:
df_tot = df.groupby('Prøvetagningsdato').sum()
datesDaily = df_tot.index
deathsDaily = df_tot['Døde'].values

In [96]:
fig,ax1 = plt.subplots()

ax2 = ax1.twinx()

ax1.plot(datesDaily,deathsDaily,'k.-',markersize=3,linewidth=0.5,label='Daglig data, regionalt')
ax1.plot(rnTime(datesDaily,7),rnMean(deathsDaily,7),'k',label='7-dages gennemsnit, regionalt')
weekTotDeaths = getTimeSeriesAll(df2_D)
# ax2.step(weekDTs,weekTotDeaths,'b',label='Ugentlige data, gennembrudsfiler')
ax1.step(weekDTs,weekTotDeaths/7,'b',where='pre',label='Ugentlig data, gennembrudsfiler')

ax1.set_xlim(left=weekDTs[0],right=datesDaily[-1]+np.timedelta64(5,'D'))

ax1.set_ylabel('Daglige dødsfald')
ax2.set_ylabel('Ugentlige dødsfald')

ax1.set_ylim(bottom=0)
ax1.set_ylim(top=np.max(deathsDaily[-50:])*1.1)

ax1.legend()

curylim = ax1.get_ylim()
ax2.set_ylim(0,curylim[1]*7)

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))

# Draw weekends
firstSunday = np.datetime64('2021-10-03')
numWeeks = 52
for k in range(0,numWeeks):
    curSunday = firstSunday + np.timedelta64(7*k,'D')
    ax1.axvspan(curSunday-np.timedelta64(1,'D')-np.timedelta64(12,'h'),curSunday+np.timedelta64(12,'h'),zorder=-1,facecolor='lightgrey',label=int(k==0)*'Weekend')

ax1.grid(axis='y')

if saveFigures:
    plt.savefig(path_figs+'Regionalt\\DeathsComparison')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Ugentlige dødsfald, fordelt på alder

In [86]:
ageGroupsGrouped = [ageGroups[0:5],ageGroups[5:7],ageGroups[7:10],ageGroups[10],ageGroups[11]]
ageGroupsGroupedNames = ['0-29','30-49','50-69','70-79','80+']
print(ageGroupsGrouped)
print(ageGroupsGroupedNames)

[array(['0-5', '6-11', '12-15', '16-19', '20-29'], dtype=object), array(['30-39', '40-49'], dtype=object), array(['50-59', '60-64', '65-69'], dtype=object), '70-79', '80+']
['0-29', '30-49', '50-69', '70-79', '80+']


In [87]:
firstDate = weekDTs[0]

In [98]:
fig,ax1 = plt.subplots(figsize=(13,6.5))
fig.patch.set_facecolor('xkcd:off white')
ax1.set_facecolor('xkcd:off white')

# curAge = ageGroups[-3]
# curAge
# for curAge in ageGroupsGrouped:
for i in range(len(ageGroupsGrouped)):
    curAge = ageGroupsGrouped[i]
    curName = ageGroupsGroupedNames[i]

    curDeaths = getTimeSeriesAll(df2_D,curAge=curAge)
    ax1.plot(weekDTs,curDeaths,'.-',label=curName)
    # if (np.sum(curDeaths) > 0):
    #     ax1.plot(weekDTs,curDeaths,label=curAge)

# Draw weekends
firstSunday = np.datetime64('2021-10-03')
numWeeks = 52
for k in range(0,numWeeks):
    curSunday = firstSunday + np.timedelta64(7*k,'D')
    ax1.axvspan(curSunday-np.timedelta64(1,'D')-np.timedelta64(12,'h'),curSunday+np.timedelta64(12,'h'),zorder=-1,facecolor='lightgrey',label=int(k==0)*'Weekend')

ax1.set_ylabel('Ugentlige nye dødsfald')

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))
# ax1.set_xlim(left=firstDate,right=thisdf.Dato.max()+np.timedelta64(5,'D'))
ax1.set_xlim(left=firstDate,right=weekDTs[-1]+np.timedelta64(5,'D'))
ax1.set_ylim(bottom=0)
# ax2.set_ylim(bottom=0)
ax1.legend(loc='upper left')
# ax2.legend(loc='upper left')
ax1.grid(axis='y')
# ax2.grid(axis='y')

# smalltxt = 'Rasmus Kristoffer Pedersen, PandemiX Center, Roskilde Universitet.\nData fra SSI overvågningsdata, dashboard-fil. Filnavn: Regionalt_DB/18_fnkt_alder_uge_testede_positive_nyindlagte.csv'
# ax1.text(1,-0.15,smalltxt,
#      horizontalalignment='right',
#      verticalalignment='top',
#      transform = ax1.transAxes,
#      fontsize=10)
     
fig.tight_layout()


fig.savefig(path_figs+'AgeDistribution\\Agedistribution_DeathsWeekly')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [89]:
fig,ax1 = plt.subplots(figsize=(13,6.5))
# fig,(ax1,ax2) = plt.subplots(2,1,sharex=True,figsize=(13,13))

fig.patch.set_facecolor('xkcd:off white')
ax1.set_facecolor('xkcd:off white')
# ax2.set_facecolor('xkcd:off white')

for curAge in dfAge.Aldersgruppe.unique():
    thisdf = dfAge[dfAge.Aldersgruppe == curAge]
    ax1.plot(thisdf.Dato,thisdf['Antal positive'],'.-',label=curAge)
    # ax2.plot(thisdf.Dato,thisdf['Positive pr. 100.000 borgere'],'.-',label=curAge)
    # ax2.plot(thisdf.Dato,thisdf['Positive pr. 100.000 borgere']/1000,'.-',label=curAge)

# Draw weekends
# firstSunday = np.datetime64('2021-01-03')
firstSunday = np.datetime64('2021-10-03')
numWeeks = 52
for k in range(0,numWeeks):
    curSunday = firstSunday + np.timedelta64(7*k,'D')
    ax1.axvspan(curSunday-np.timedelta64(1,'D')-np.timedelta64(12,'h'),curSunday+np.timedelta64(12,'h'),zorder=-1,facecolor='lightgrey',label=int(k==0)*'Weekend')
    # ax2.axvspan(curSunday-np.timedelta64(1,'D')-np.timedelta64(12,'h'),curSunday+np.timedelta64(12,'h'),zorder=-1,facecolor='lightgrey',label=int(k==0)*'Weekend')

ax1.set_ylabel('Ugentlige nye smittetilfælde')
# ax2.set_ylabel('Ugentlige nye smittetilfælde per 100.000')
# ax2.set_ylabel('Andel ud af alle borgere i aldersgruppen [%]')

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))
ax1.set_xlim(left=firstDate,right=thisdf.Dato.max()+np.timedelta64(5,'D'))
ax1.set_ylim(bottom=0)
# ax2.set_ylim(bottom=0)
ax1.legend(loc='upper left')
# ax2.legend(loc='upper left')
ax1.grid(axis='y')
# ax2.grid(axis='y')

smalltxt = 'Rasmus Kristoffer Pedersen, PandemiX Center, Roskilde Universitet.\nData fra SSI overvågningsdata, dashboard-fil. Filnavn: Regionalt_DB/18_fnkt_alder_uge_testede_positive_nyindlagte.csv'
ax1.text(1,-0.15,smalltxt,
     horizontalalignment='right',
     verticalalignment='top',
     transform = ax1.transAxes,
     fontsize=10)
     
fig.tight_layout()


fig.savefig(path_figs+'Agedistribution_CasesWeekly')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

NameError: name 'dfAge' is not defined