In [1]:
# Notebook for investigating dashboard data
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 50)


import matplotlib.pyplot as plt
%matplotlib widget
plt.rcParams['figure.figsize'] = (12,8)
plt.rcParams["image.cmap"] = "tab10"
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.tab10.colors)
#Settings for plotting
fs_label = 16
parameters = {
                'axes.labelsize': fs_label,
                'axes.titlesize': fs_label+4,
                'xtick.labelsize': fs_label,
                'ytick.labelsize': fs_label, 
                'legend.fontsize': fs_label, 
                'lines.markersize': 10,
                'lines.linewidth': 3
             }
plt.rcParams.update(parameters)

%matplotlib widget
import matplotlib.colors as colors
from matplotlib import cm # Colormaps

import locale
import matplotlib.dates as mdates
locale.setlocale(locale.LC_TIME,"Danish")
locale.setlocale(locale.LC_ALL,"Danish")

## Useful commands to copy:
# cmap = plt.cm.get_cmap('Dark2',len(ageGroups))
# ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
# ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))
# plt.rcParams["image.cmap"] = "Dark2"
# plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.Dark2.colors)

import os
import math

from datetime import date


saveFigures = True
print('saveFigures is set to: '+str(saveFigures))

print('Done loading packages')

def rnMean(data,meanWidth):
    return np.convolve(data, np.ones(meanWidth)/meanWidth, mode='valid')
def rnTime(t,meanWidth):
    return t[math.floor(meanWidth/2):-math.ceil(meanWidth/2)+1]

saveFigures is set to: True
Done loading packages


In [2]:
# Get the most recent file
ssidatapath = "ssi_dashboard"
rootdir = os.getcwd() +"/" + ssidatapath


for subdir, dirs, files in os.walk(rootdir):
    if not len(files) == 0:
        latestdir = subdir
        linkIni = latestdir.find('SSI_dashboard_')
        latestDate = latestdir[linkIni+14:linkIni+24]

print(latestdir)
print(latestDate)

d:\Pandemix\Github\DanskeData/ssi_dashboard\SSI_dashboard_2021-05-17\Regionalt_DB
2021-05-17


In [3]:
# Load the data
# fulldf = pd.read_csv(latestdir+'/01_noegle_tal.csv',delimiter = ';',dtype=str,encoding='latin-1') 
# fulldf = pd.read_csv(latestdir+'/05_bekraeftede_tilfaelde_doede_pr_region_pr_alders_grp.csv',delimiter = ';',dtype=str,encoding='latin-1') 
# fulldf = pd.read_csv(latestdir+'/11_noegletal_pr_region_pr_aldersgruppe.csv',delimiter = ';',encoding='latin-1') 

fulldf = pd.read_csv(latestdir+'/03_bekraeftede_tilfaelde_doede_indlagte_pr_dag_pr_koen.csv',delimiter = ';',encoding='latin-1') 

In [4]:
# fulldf[fulldf.Aldersgruppe == '20-29']['Bekræftede tilfælde'].sum()
# fulldf['Bekræftede tilfælde'].sum()



In [5]:
df = pd.read_csv(latestdir+'/03_bekraeftede_tilfaelde_doede_indlagte_pr_dag_pr_koen.csv',delimiter = ';',encoding='latin-1') 
df["Prøvetagningsdato"] = df["Prøvetagningsdato"].astype('datetime64[D]')

In [6]:
# curDates = pd.to_datetime(df[df.Region == 'Hovedstaden']['Prøvetagningsdato'])
# curCases = df[df.Region == 'Hovedstaden']['Bekræftede tilfælde']

allDates = df['Prøvetagningsdato'].unique()

curDates = []
curCases = []

for da in allDates:

    # curDate = da
    curCount = df[df['Prøvetagningsdato']==da]['Bekræftede tilfælde'].sum()

    curDates.append(da)
    curCases.append(curCount)


# df[df['Prøvetagningsdato']==da]['Bekræftede tilfælde'].sum()

In [22]:
fig,ax1 = plt.subplots()

# # Show mondays
# testMonday = np.datetime64('2021-05-17')
# numMondays = 30
# for m in range(0,numMondays):
#     # ax1.bar(testMonday-np.timedelta64(m*7,'D'),curMax,color='grey',width=0.1)
#     ax1.axvline(x=testMonday-np.timedelta64(m*7,'D'),color='lightgrey')
    
curMax = max(curCases[-100:])*1.1

# Show weekend
testSat = np.datetime64('2021-01-02')
numWeeks = 30
for m in range(0,numWeeks):
    ax1.bar(testSat+np.timedelta64(m*7,'D') + np.timedelta64(12,'h'),curMax,color='grey',width=np.timedelta64(48,'h'))
    # ax1.axvline(x=testMonday-np.timedelta64(m*7,'D'),color='lightgrey')

ax1.bar(testSat+np.timedelta64(m*7,'D') + np.timedelta64(12,'h'),curMax,color='grey',width=np.timedelta64(48,'h'),label='Weekend')

ax1.plot(curDates,curCases,'k.:',linewidth=0.5,label='Data')
meanWidth = 7
ax1.plot(rnTime(curDates,meanWidth),rnMean(curCases,meanWidth),'k',label='7 dages løbende gennemsnit')



ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d\n%b'))

ax1.set_xlim(left=np.datetime64('2021-02-01'), right=np.datetime64(date.today())+np.timedelta64(1,'D'))
ax1.set_ylim([0,curMax])

ax1.set_ylabel('Antal bekræftede tilfælde')


ax1.legend()

plt.tight_layout()

if saveFigures:
    plt.savefig('figs/CasesFromDashboard_Total')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
sum(curCases)

264609

In [9]:
curDates[-6:]

[numpy.datetime64('2021-05-11T00:00:00.000000000'),
 numpy.datetime64('2021-05-12T00:00:00.000000000'),
 numpy.datetime64('2021-05-13T00:00:00.000000000'),
 numpy.datetime64('2021-05-14T00:00:00.000000000'),
 numpy.datetime64('2021-05-15T00:00:00.000000000'),
 numpy.datetime64('2021-05-16T00:00:00.000000000')]

In [10]:
curCases[-6:]

[1286, 1144, 895, 867, 932, 272]

In [11]:
df

Unnamed: 0,Region,Prøvetagningsdato,Køn,Bekræftede tilfælde,Døde,Indlæggelser,Kummuleret antal døde,Kummuleret antal bekræftede tilfælde,Kummuleret antal indlæggelser
0,Sjælland,2020-02-26,M,1,0,0,0,1,0
1,Hovedstaden,2020-02-27,M,1,0,0,0,2,0
2,Midtjylland,2020-02-28,M,1,0,0,0,3,0
3,Hovedstaden,2020-03-01,M,1,0,1,0,4,1
4,Hovedstaden,2020-03-02,M,2,0,1,0,6,2
5,Sjælland,2020-03-02,M,1,0,0,0,7,2
6,Syddanmark,2020-03-02,M,1,0,0,0,8,2
7,Hovedstaden,2020-03-03,F,1,0,0,0,9,2
8,Hovedstaden,2020-03-03,M,3,0,1,0,12,3
9,Hovedstaden,2020-03-04,M,2,0,0,0,14,3
