In [1]:
# Notebook for investigating dashboard data
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 50)


import matplotlib.pyplot as plt
%matplotlib widget
plt.rcParams['figure.figsize'] = (12,8)
plt.rcParams["image.cmap"] = "tab10"
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.tab10.colors)
#Settings for plotting
fs_label = 16
parameters = {
                'axes.labelsize': fs_label,
                'axes.titlesize': fs_label+4,
                'xtick.labelsize': fs_label,
                'ytick.labelsize': fs_label, 
                'legend.fontsize': fs_label, 
                'lines.markersize': 10,
                'lines.linewidth': 3
             }
plt.rcParams.update(parameters)

%matplotlib widget
import matplotlib.colors as colors
from matplotlib import cm # Colormaps

import locale
import matplotlib.dates as mdates
locale.setlocale(locale.LC_TIME,"Danish")
locale.setlocale(locale.LC_ALL,"Danish")

## Useful commands to copy:
# cmap = plt.cm.get_cmap('Dark2',len(ageGroups))
# ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
# ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))
# plt.rcParams["image.cmap"] = "Dark2"
# plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.Dark2.colors)

import os
import math

from datetime import date


saveFigures = True
print('saveFigures is set to: '+str(saveFigures))

print('Done loading packages')

def rnMean(data,meanWidth):
    return np.convolve(data, np.ones(meanWidth)/meanWidth, mode='valid')
def rnTime(t,meanWidth):
    return t[math.floor(meanWidth/2):-math.ceil(meanWidth/2)+1]

saveFigures is set to: True
Done loading packages


In [2]:
# Get the most recent file
ssidatapath = "ssi_dashboard"
rootdir = os.getcwd() +"/" + ssidatapath


for subdir, dirs, files in os.walk(rootdir):
    if not len(files) == 0:
        latestdir = subdir
        linkIni = latestdir.find('SSI_dashboard_')
        latestDate = latestdir[linkIni+14:linkIni+24]

print(latestdir)
print(latestDate)

d:\Pandemix\Github\DanskeData/ssi_dashboard\SSI_dashboard_2021-05-25\Regionalt_DB
2021-05-25


In [3]:

maindf = pd.read_csv(latestdir+'/01_noegle_tal.csv',delimiter = ';',dtype=str,encoding='latin-1') 

In [4]:
# Load the data
# fulldf = pd.read_csv(latestdir+'/01_noegle_tal.csv',delimiter = ';',dtype=str,encoding='latin-1') 
# fulldf = pd.read_csv(latestdir+'/05_bekraeftede_tilfaelde_doede_pr_region_pr_alders_grp.csv',delimiter = ';',dtype=str,encoding='latin-1') 
# fulldf = pd.read_csv(latestdir+'/11_noegletal_pr_region_pr_aldersgruppe.csv',delimiter = ';',encoding='latin-1') 

fulldf = pd.read_csv(latestdir+'/03_bekraeftede_tilfaelde_doede_indlagte_pr_dag_pr_koen.csv',delimiter = ';',encoding='latin-1') 

In [5]:
# fulldf[fulldf.Aldersgruppe == '20-29']['Bekræftede tilfælde'].sum()
# fulldf['Bekræftede tilfælde'].sum()

In [6]:
df = pd.read_csv(latestdir+'/03_bekraeftede_tilfaelde_doede_indlagte_pr_dag_pr_koen.csv',delimiter = ';',encoding='latin-1') 
df["Prøvetagningsdato"] = df["Prøvetagningsdato"].astype('datetime64[D]')

In [7]:
# curDates = pd.to_datetime(df[df.Region == 'Hovedstaden']['Prøvetagningsdato'])
# curCases = df[df.Region == 'Hovedstaden']['Bekræftede tilfælde']

allDates = df['Prøvetagningsdato'].unique()

curDates = []
curCases = []

for da in allDates:

    # curDate = da
    curCount = df[df['Prøvetagningsdato']==da]['Bekræftede tilfælde'].sum()

    curDates.append(da)
    curCases.append(curCount)


# df[df['Prøvetagningsdato']==da]['Bekræftede tilfælde'].sum()

In [8]:
fig,ax1 = plt.subplots()

# # Show mondays
# testMonday = np.datetime64('2021-05-17')
# numMondays = 30
# for m in range(0,numMondays):
#     # ax1.bar(testMonday-np.timedelta64(m*7,'D'),curMax,color='grey',width=0.1)
#     ax1.axvline(x=testMonday-np.timedelta64(m*7,'D'),color='lightgrey')
    
curMax = max(curCases[-100:])*1.1

# Show weekend
testSat = np.datetime64('2021-01-02')
numWeeks = 30
for m in range(0,numWeeks):
    ax1.bar(testSat+np.timedelta64(m*7,'D') + np.timedelta64(12,'h'),curMax,color='grey',width=np.timedelta64(48,'h'))
    # ax1.axvline(x=testMonday-np.timedelta64(m*7,'D'),color='lightgrey')

ax1.bar(testSat+np.timedelta64(m*7,'D') + np.timedelta64(12,'h'),curMax,color='grey',width=np.timedelta64(48,'h'),label='Weekend')

ax1.plot(curDates,curCases,'k.:',linewidth=0.5,label='Data')
meanWidth = 7
ax1.plot(rnTime(curDates,meanWidth),rnMean(curCases,meanWidth),'k',label='7 dages løbende gennemsnit')



ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))

ax1.set_xlim(left=np.datetime64('2021-02-01'), right=np.datetime64(date.today())+np.timedelta64(1,'D'))
ax1.set_ylim([0,curMax])

ax1.set_ylabel('Antal bekræftede tilfælde')


ax1.legend()

plt.tight_layout()

if saveFigures:
    plt.savefig('figs/CasesFromDashboard_Total')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
sum(curCases)

272421

In [10]:
curDates[-6:]

[numpy.datetime64('2021-05-20T00:00:00.000000000'),
 numpy.datetime64('2021-05-21T00:00:00.000000000'),
 numpy.datetime64('2021-05-22T00:00:00.000000000'),
 numpy.datetime64('2021-05-23T00:00:00.000000000'),
 numpy.datetime64('2021-05-24T00:00:00.000000000'),
 numpy.datetime64('2021-05-25T00:00:00.000000000')]

In [11]:
curCases[-6:]

[1009, 822, 781, 793, 276, 2]

In [12]:
df

Unnamed: 0,Region,Prøvetagningsdato,Køn,Bekræftede tilfælde,Døde,Indlæggelser,Kummuleret antal døde,Kummuleret antal bekræftede tilfælde,Kummuleret antal indlæggelser
0,Sjælland,2020-02-26,M,1,0,0,0,1,0
1,Hovedstaden,2020-02-27,M,1,0,0,0,2,0
2,Midtjylland,2020-02-28,M,1,0,0,0,3,0
3,Hovedstaden,2020-03-01,M,1,0,1,0,4,1
4,Hovedstaden,2020-03-02,M,2,0,1,0,6,2
5,Sjælland,2020-03-02,M,1,0,0,0,7,2
6,Syddanmark,2020-03-02,M,1,0,0,0,8,2
7,Hovedstaden,2020-03-03,F,1,0,0,0,9,2
8,Hovedstaden,2020-03-03,M,3,0,1,0,12,3
9,Hovedstaden,2020-03-04,M,2,0,0,0,14,3


# Nye indlæggelser

In [13]:
admdf = pd.read_csv(latestdir+'/06_nye_indlaeggelser_pr_region_pr_dag.csv',delimiter = ';',encoding='latin-1') 

In [14]:
admdf["Dato"] = admdf["Dato"].astype('datetime64[D]')

In [15]:
admdf.tail()

Unnamed: 0,Region,Dato,Indlæggelser
1779,Midtjylland,2021-05-24,2
1780,Nordjylland,2021-05-24,2
1781,Sjælland,2021-05-24,3
1782,Syddanmark,2021-05-24,3
1783,Hovedstaden,2021-05-25,2


In [16]:

allAdmDates = admdf['Dato'].unique()

admDates = []
curAdm = []

for da in allAdmDates:

    curCount = admdf[admdf['Dato']==da]['Indlæggelser'].sum()

    admDates.append(da)
    curAdm.append(curCount)



In [17]:
admDates = np.array(admDates).astype('datetime64[D]')

In [18]:
fig,ax1 = plt.subplots()

ax1.plot(admDates,curAdm,'k.:',linewidth=0.5)
meanWidth = 7
ax1.plot(rnTime(admDates,meanWidth),rnMean(curAdm,meanWidth),'k')

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))

ax1.set_ylim(bottom=0,top=70)
ax1.set_xlim(left=np.datetime64('2021-02-01'))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
dfAdm = pd.read_excel('Admitted\Admitted.xlsx')
dfAdm = dfAdm.transpose()
dfAdm.columns = dfAdm.iloc[0]
dfAdm = dfAdm.drop(['Aldersgruppe']) 
curDates =  pd.to_datetime(dfAdm.index,format='%d_%m_%Y')
dfAdmDiff = dfAdm.diff().iloc[1:]
plotDates = curDates[1:]

In [20]:
# sumUp = []
# sumDates = []

# for k in np.arange(0,len(curAdm),7):

#     # print(np.sum(curAdm[k:k+7]))
#     curSum = np.sum(curAdm[k-6:k+1])
#     sumUp.append(curSum)
#     sumDates.append(admDates[k])

In [21]:
res = np.cumsum(curAdm)
toPlot = res[7:] - res[:-7]
toPlotDays = admDates[7:]

In [22]:
fig,ax1 = plt.subplots()

ax1.plot(admDates,curAdm,'k.:',linewidth=0.5)
meanWidth = 7
ax1.plot(rnTime(admDates,meanWidth),rnMean(curAdm,meanWidth),'k')

# ax1.plot(sumDates,np.divide(sumUp,7),'rs')
# ax1.plot(toPlotDays,np.divide(toPlot,7),'m')
ax1.plot(plotDates,np.divide(dfAdmDiff['I alt'],7),'*')

# ax1.plot(toPlotDays,toPlot,'m')
# ax1.plot(plotDates,dfAdmDiff['I alt'],'*')


ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))

ax1.set_ylim(bottom=0,top=70)
# ax1.set_ylim(bottom=0,top=70*7)
ax1.set_xlim(left=np.datetime64('2021-02-01'))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
fig,ax1 = plt.subplots()

ax1.plot(admDates,np.cumsum(curAdm))
ax1.plot(admDates-np.datetime64(3,'D'),np.cumsum(curAdm))
ax1.plot(admDates-np.datetime64(4,'D'),np.cumsum(curAdm))
ax1.plot(admDates-np.datetime64(7,'D'),np.cumsum(curAdm))
ax1.plot(curDates,dfAdm['I alt'],'k*')


ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))

# ax1.set_ylim(bottom=0,top=70*7)
# ax1.set_xlim(left=np.datetime64('2021-01-01'))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [24]:
df = pd.read_csv(latestdir+'/08_bekraeftede_tilfaelde_pr_dag_pr_regions.csv',delimiter = ';',encoding='latin-1') 
df.tail()

Unnamed: 0,Region,Dato,Bekræftede tilfælde
2150,Midtjylland,2021-05-24,42
2151,Nordjylland,2021-05-24,32
2152,Sjælland,2021-05-24,40
2153,Syddanmark,2021-05-24,33
2154,Hovedstaden,2021-05-25,2


In [25]:
df = pd.read_csv(latestdir+'/01_noegle_tal.csv',delimiter = ';',encoding='latin-1') 
print(df['Overstået infektion'].sum())
print(df['Bekræftede tilfælde'].sum())
df

259313
275207


Unnamed: 0,Dato,Region,Køn,Bekræftede tilfælde,Døde,Overstået infektion,Indlæggelser,Testede personer,Ændring i antal bekræftede tilfælde,Ændring i antal døde,Ændring i antal overstået infektion,Ændring i antal indlagte,Ændring i antallet af testede personer,Antallet af prøver,Ændring i antallet af prøver,test_AG,test_AG_diff
0,2021-05-25,Hovedstaden,F,66305,635,62773,3637,812652,144,1,231,9,312,6227088,21859,3556674,66516
1,2021-05-25,Hovedstaden,M,63291,699,59442,3771,758304,153,0,231,3,373,4712555,18701,3300826,61784
2,2021-05-25,Midtjylland,F,24048,156,22764,990,549621,84,0,135,0,249,3832043,18601,2145796,39866
3,2021-05-25,Midtjylland,M,23661,198,22230,1216,522111,86,0,148,2,323,2852530,14690,2037801,38648
4,2021-05-25,,,0,0,0,0,23,0,0,0,0,0,35,0,0,0
5,2021-05-25,,F,846,5,813,54,21798,3,0,4,0,50,78762,242,107706,1920
6,2021-05-25,,M,1940,7,1868,130,38600,4,0,9,0,83,136603,434,215907,3962
7,2021-05-25,Nordjylland,F,9437,73,8695,425,253434,61,0,28,0,94,1906512,8890,925757,17669
8,2021-05-25,Nordjylland,M,9757,112,8872,521,246074,62,0,45,2,152,1420609,7514,884718,17125
9,2021-05-25,Sjælland,F,19096,181,18026,1166,338666,50,0,81,0,135,2481070,8049,1394834,26529


# Go through each days nøgletal


In [26]:
# Collect the cases, tests and positive percentage from all the "cases by age" files
# dfCase = pd.DataFrame()
# dfDone = pd.DataFrame()
collectdf = pd.DataFrame(columns=['Date','Case','Done','Dead','Admitted'])

ssidatapath = "ssi_dashboard"
rootdir = os.getcwd() +"/" + ssidatapath

counter = 0

for subdir, dirs, files in os.walk(rootdir):
    if not len(files) == 0:
        latestdir = subdir
        linkIni = latestdir.find('SSI_dashboard_')
        latestDate = latestdir[linkIni+14:linkIni+24]

        # print(latestdir)
        if (latestdir.find('Regionalt') != -1):
            # print(latestdir)

            curdf = pd.read_csv(latestdir+'/01_noegle_tal.csv',delimiter = ';',encoding='latin-1')

            curDate = latestDate
            curCase = curdf['Bekræftede tilfælde'].sum()
            curDone = curdf['Overstået infektion'].sum()
            curDead = curdf['Døde'].sum()
            curAdmi = curdf['Indlæggelser'].sum()

            collectdf.loc[counter] = {'Date': curDate,'Case':curCase,'Done':curDone,'Dead':curDead,'Admitted':curAdmi}


            counter += 1

            # dfCase.loc[latestDate] = curdf['Bekræftede tilfælde'].sum()
            # dfDone.loc[latestDate] = curdf['Overstået infektion'].sum()
#         curdf['Antal_bekræftede_COVID-19'] = pd.to_numeric(curdf['Antal_bekræftede_COVID-19'].astype(str).apply(lambda x: x.replace('.','')))
#         curdf['Antal_testede'] = pd.to_numeric(curdf['Antal_testede'].astype(str).apply(lambda x: x.replace('.','')))
#         curdf['Procent_positive'] = pd.to_numeric(curdf['Procent_positive'].astype(str).apply(lambda x: x.replace(',','.')))
        
        
#         dfCase.loc[latestDate] = curdf['Antal_bekræftede_COVID-19'].values
#         dfTest.loc[latestDate] = curdf['Antal_testede'].values
#         dfPosP.loc[latestDate] = curdf['Procent_positive'].values

# allDates = dfCase.index


In [27]:
collectdf.tail()

Unnamed: 0,Date,Case,Done,Dead,Admitted
47,2021-05-18,268255,252686,2503,14894
48,2021-05-19,269343,253678,2505,14920
49,2021-05-20,270557,254739,2506,14955
50,2021-05-21,271908,255624,2506,14980
51,2021-05-25,275207,259313,2510,15091


In [28]:
collectdf.Date = collectdf.Date.astype('datetime64[D]')

In [29]:
fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(12,18))

diffDate = collectdf.Date[:-1]


ax1.plot(collectdf.Date,collectdf.Case,'*-')
ax1.plot(collectdf.Date,collectdf.Done,'*-')


ax2.plot(collectdf.Date,collectdf.Case-collectdf.Done)
ax2.plot(collectdf.Date,collectdf.Case-collectdf.Done-collectdf.Dead,'r')



ax3.plot(diffDate,collectdf.Case.diff()[1:],'b.:',markersize=3,linewidth=0.5)
ax3.plot(diffDate,collectdf.Done.diff()[1:],'r.:',markersize=3,linewidth=0.5)

meanWidth = 7
ax3.plot(rnTime(diffDate,meanWidth),rnMean(collectdf.Case.diff()[1:],meanWidth),'b')
ax3.plot(rnTime(diffDate,meanWidth),rnMean(collectdf.Done.diff()[1:],meanWidth),'r')

ax2.set_ylim(bottom=0)

ax1.grid()
ax2.grid()
ax3.grid()

plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [30]:
fig,(ax1,ax2) = plt.subplots(2,1)


ax1.plot(diffDate,collectdf.Admitted.diff()[1:])


ax2.plot(diffDate,collectdf.Dead.diff()[1:])

ax1.set_ylim(bottom=0)
ax2.set_ylim(bottom=0)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.0, 9.45)