In [None]:
#Limit dates to plot
N_days=60 # number of days to include
bad_cutoff=20000 # minimum number of tests to consider good

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import requests
import zipfile
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from datetime import date, timedelta

In [None]:
#Defin todays URL manualy
#url = 'https://files.ssi.dk/Data-Epidemiologiske-Rapport-30102020-pc29'


# Find link to latest file from the web page
# assumes stabel initial part of filename and webpage location
req = Request("https://www.ssi.dk/sygdomme-beredskab-og-forskning/sygdomsovervaagning/c/covid19-overvaagning")
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "html")
                     
for link in soup.findAll('a'):                         
    if isinstance(link.get('href'), str) and "Data-Epidemiologiske-Rapport" in link.get('href'):
        print("url for zip file: " + link.get('href'))
        url=link.get('href')
        


In [None]:
# Direct download data and unpacking
r = requests.get(url, allow_redirects=True)
open('data.zip', 'wb').write(r.content)

with zipfile.ZipFile("data.zip","r") as zip_ref:
    zip_ref.extractall("data/")

#define the file to read
datafolder=Path("data/")
datafile=datafolder / "Test_pos_over_time.csv"

In [None]:
# Read datafile
# Skips last two lines (which does not convert to date) and converts index to date
# Notice handeling of danish format of the numbers (both decimal and thousands)
df=pd.read_csv(datafile,  sep=';', parse_dates=['Date'], index_col=['Date'],error_bad_lines=False, engine='python', skipfooter=2, decimal=',', thousands='.')


In [None]:
# calculate some more numbers

# Positive emperical scaled by number of tests to power of 0.7  
# This scaling is based on results in 
# SSI "Ekspertrapport af d. 23. oktober 2020 Incidens og fremskrivning af COVID-19 tilfælde"
# https://www.ssi.dk/-/media/ssi-files/ekspertrapport-af-den-23-oktober-2020-incidens-og-fremskrivning-af-covid19-tilflde.pdf?la=da
def calcScaledNumber (row):
    if row.NotPrevPos > 0 :
        return row.NewPositive / (row.NotPrevPos**0.7)
    else:
        return 0
    
df['ScaledNumber']=df.apply(lambda row: calcScaledNumber(row), axis=1)    

# Recalculate Positiv procent to get more decimals for plotting
def calcPosPct (row):
    if row.NotPrevPos > 0 :
        return row.NewPositive / row.NotPrevPos * 100
    else:
        return 0
df['PosPct']=df.apply(lambda row: calcPosPct(row), axis=1)


In [None]:
# for easy plot make a sub data frame with selected number of days 
df_sel=df[date.today()-timedelta(days=N_days):]

# and make index for "bad" datapoints
bad_idx=df_sel['NotPrevPos']<bad_cutoff

In [None]:
# define a common title including date from file
title_str='SSI COVID-19 data, tilfælde opgjort på prøvetagningsdato \n' 
title_str += url[50:58]

In [None]:
ax=df_sel.plot(y='NewPositive',title=title_str,style='.');
df_sel[bad_idx].plot(ax=ax,y='NewPositive',style='.',color='red',label='NewPositive (Tested<'+ str(bad_cutoff) + ')');


In [None]:
ax=df_sel.plot(y='NotPrevPos',label='Tested (NotPrevPos)',title=title_str,style='.');
df_sel[bad_idx].plot(ax=ax,y='NotPrevPos',style='.',color='red',label='Tested<'+ str(bad_cutoff) + '');

In [None]:
ax=df_sel.plot(y='PosPct',title=title_str,label='NewPositive / NotPrevPosTested * 100',style='.');
df_sel[bad_idx].plot(ax=ax,y='PosPct',style='.',color='red',label='NewPositive / NotPrevPosTested * 100 (Tested<'+ str(bad_cutoff) + ')');
ax.set_ylabel("%");
#plt.savefig('plot.png', dpi=300, bbox_inches='tight')


In [None]:
ax=df_sel.plot(y='ScaledNumber',title=title_str,label='NewPositive/NotPrevPosTested^0.7',style='.');
df_sel[bad_idx].plot(ax=ax,y='ScaledNumber',style='.',color='red', label='NewPositive/NotPrevPosTested^0.7 (Tested<'+ str(bad_cutoff) + ')');

#plt.savefig('plot2.png', dpi=300, bbox_inches='tight')
#plt.show()
# This scaling is based on 
# SSI "Ekspertrapport af d. 23. oktober 2020 Incidens og fremskrivning af COVID-19 tilfælde"
# https://www.ssi.dk/-/media/ssi-files/ekspertrapport-af-den-23-oktober-2020-incidens-og-fremskrivning-af-covid19-tilflde.pdf?la=da