# Utility functions

Some functions required by this and other modules.


In [1]:
import requests
import json
import datetime
from datetime import date, timedelta

def ensure_path_exists(thePath):
    """ The function checks of the given relative or absolute path exists and if not it will try to create it. If that fails
    the function will throw an exception

    Args:
        thePath (string): the given relative or absolute path

    Returns:
        string: The absolute path that will exists or an empty string if it failed to create it
    """
    # if the data directory is given as an absolute path it's all fine
    if Path(thePath).is_absolute():
        result = thePath
    else:
        # get path to the directory of this file
        try:
            # check if it is running in jupyter, it will throw if not running in jupyter
            get_ipython
            # the absolute directory of this python file
            currentDirectory = os.path.dirname(os.path.abspath(os.path.abspath('')))
        except:
            # the absolute directory of this python file
            currentDirectory = os.path.dirname(os.path.abspath(__file__))
        # the directory is not given as an absolute path so add it to the current directory
        result = currentDirectory + '/' + thePath
    if not os.path.exists(result):
        try:
            os.makedirs(result)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return ''
    return result
    
def download_JSON_file(endpoint, filename):
    """ The function downloads a JSON file from the given endpoint and stores it in a file 
    of the given filename. If the directory doesn't exist it will be created. The function 
    throws an exception in case of an error

    Args:
        endpoint (string): the full endpoint that is referring to a JSON file
        filename (string): the full filename of the file to be created

    Raises:
        IOError: In case it can't save the data
    """
    # contact the server
    res = requests.get(endpoint)
    # check if there was a response
    if res.ok:
        # get the json
        res = res.json()
    else:
        # raise an exception
        res.raise_for_status()
    try:
        # create the directory if it doesn't exist 
        path = os.path.dirname(filename)
        if not os.path.exists(path):
            os.makedirs(filename)
        # write it to the file
        with open (filename, 'w', encoding='utf-8') as f:
            # use dumps as we don't care about formatting
            f.write(json.dumps(res) + "\n")
    except:
        msg = 'Error writing file ' + filename
        raise IOError(msg)      

def this_or_last_weekday(the_date, the_weekday):
    """ Retruns the given date of the last weekday or the given date if that has the right weekday.
        Example: the_date = 2022.01.19 that was a Wednesday (weekday=2), 
                 if being called with the_weekday=4 (Friday) the function will return 2022.01.14
                 if being called with the_weekday=2 (Wednesday) the function will return 2022.01.19

    Args:
        the_date (Date): the date to be checked
        the_weekday (int): the day of the week to get the date for ranging from 0 (Monday) to 6 (Sunday)

    Returns:
        DateTime: The date of the weekday a week ago or at the given date if it is already the proper weekday
    """
    # maybe it is the_date that is the right weekday
    if the_date.weekday() == the_weekday:
        return the_date
    # 9:00 on that date
    the_time = datetime.datetime(the_date.year, the_date.month, the_date.day, 9, 0)
    # get the same day one week ago at 9:00
    last_weekday = (the_time.date() -
                    datetime.timedelta(days=the_time.weekday()) +
                    datetime.timedelta(days=the_weekday, weeks=-1))
    last_weekday_at_9 = datetime.datetime.combine(last_weekday, datetime.time(9))

    # if today is also the_weekday but after 9:00 change to the current date
    one_week = datetime.timedelta(weeks=1)
    if the_time - last_weekday_at_9 >= one_week:
        last_weekday_at_9 += one_week
    return last_weekday_at_9


# The GeoInformationWorld class

A class to handle ISO 3166 country codes and names including basic inormation about the population of the countries of the world.


In [2]:
from abc import ABC, abstractmethod
import pandas as pd
import numpy as np
import math
import re

class GeoInformationWorld():
    
    def __init__(self):
        """The constructor loads a CSV with the geo information of the countries of the world.  
            ATTENTION: The GeoID and alpha-2 of Nambia would be 'NA' but panadas csv reader makes a NaN out of it.

        Raises:
            FileNotFoundError: In case it couldn't download the file

        """
        # load the geo information for the world via GitHub
        targetFilename = 'https://raw.githubusercontent.com/1c3t3a/Covid-19-analysis/master/data/GeoInformationWorld.csv'
        self.__dfGeoInformationWorld = pd.read_csv(targetFilename, keep_default_na=False)
        
    def get_geo_information_world(self):
        """Return the dataframe of information of all countries such as country name, continent, population etc..
        
        Returns:
            DataFrame: A data frame holding the information of all countries
        """
        return self.__dfGeoInformationWorld

    def geo_name_from_geoid (self, geoID):
        """Return the name of a country of the internal geo information from a given ISO-3166-alpha_2 geoid.
        
        Args:
            geoID (str):  a string of a ISO-3166-alpha_2 geoid

        Returns:
            str: the country name
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
        # ISO-3166-alpha_3
        # find the row in our internal listin the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['GeoID'] == geoID]
        # the name used in our internal list
        return dfTheOne['GeoName'].values[0]
        
    def geo_name_from_ISO3166_alpha_3 (self, geoID):
        """Return the name of a country of the internal geo information from a given ISO-3166-alpha_3 geoid.

        Args:
            geoID (str):  a string of a ISO-3166-alpha_3 geoid

        Returns:
            str: the country name
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
        # find the row in our internal listin the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['ISO-3166-alpha_3'] == geoID]
        if dfTheOne.empty:
            # print the geoid that is not in the database
            #print('Unknown GeoId: ' + geoID)
            return 'Unknown'
        # the name used in our internal list
        return dfTheOne['GeoName'].values[0]

    def geoID_from_ISO3166_alpha_3 (self, geoID):
        """Return the name of a country of the internal geo information from a given ISO-3166-alpha_3 geoid.

        Args:
            geoID (str):  a string of a ISO-3166-alpha_3 geoid

        Returns:
            str: ISO-3166-alpha_2 geoid
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
        # find the row in our internal listin the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['ISO-3166-alpha_3'] == geoID]
        # check if it is empty
        if dfTheOne.empty:
            # print the geoid that is not in the database
            #print('Unknown GeoId: ' + geoID)
            return 'Unknown'
        # the name used in our internal list
        return dfTheOne['GeoID'].values[0]

    def ISO3166_alpha_3_from_geoID (self, geoID):
        """Return the ISO-3166-alpha_2 geoid of a country of the internal geo information from a given ISO-3166-alpha_2 geoid.

        Args:
            geoID (str):  a string of a ISO-3166-alpha_2 geoid

        Returns:
            str: the ISO-3166-alpha_3 geoid
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
        # find the row in our internal list in the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['GeoID'] == geoID]
        # the name used in our internal list
        return dfTheOne['ISO-3166-alpha_3'].values[0]

    def population_from_geoid(self, geoID):
        """Return the population of a country of the internal geo information from a given ISO-3166-alpha_2 geoid.

        Args:
            geoID (str):  a string of a ISO 3166 alpha_2 geoid

        Returns:
            int: the population of the country
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
         # find the row in our internal list in the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['GeoID'] == geoID]
        # the name used in our internal list
        pop = int(dfTheOne['Population2019'].values[0])
        return pop

    def continent_from_geoid(self, geoID):
        """Return the continent of a country of the internal geo information from a given ISO-3166-alpha_2 geoid.

        Args:
            geoID (str):  a string of a ISO 3166 alpha_2 geoid

        Returns:
            str: the continent of the country
        """
        # get the world info
        dfInfo = self.get_geo_information_world()
         # find the row in our internal list in the GeoID column
        dfTheOne = dfInfo.loc[dfInfo['GeoID'] == geoID]
        # the name used in our internal list
        return dfTheOne['Continent'].values[0]




# The CovidCases class and its subclasses

This abstract base class will expose data attributes in form of a DataFrame. It also provides methods to process the data which will end up in additional columns in the DataFrame. Please refer to [http://mb.cmbt.de/python-class-documentation/the-covidcases-class52/](http://mb.cmbt.de/python-class-documentation/the-covidcases-class52/) for a complete documentation of the class.  

So far there are three sub-classes handling three different data sources: 
  
```CovidCasesWHO```  
the data is provided by the [WHO website](https://covid19.who.int/WHO-COVID-19-global-data.csv).   

```CovidCasesOWID```  
gets data from [Our World In Data](https://covid.ourworldindata.org/data/owid-covid-data.csv). The quality of this data, especially the fact that the data is not for all countries generated by official agencies is somehow a drawback. On the other side OWID generates much more data such as vaccination numbers.  

```CovidCasesECDC```  
handles the data provided by the [European Center of Disease Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) until December, 14th. 2020.  

The documentation for this classes is available on [http://mb.cmbt.de/python-class-documentation/the-covidcases-world-sub-classes52/](http://mb.cmbt.de/python-class-documentation/the-covidcases-world-sub-classes52/). 
Please note: The ECDC subclass is a legacy class and can't be used anymore as the ECDC is not publishing daily updated data since December 2020.  

The [blog post in this link](http://mb.cmbt.de/covid-19-analysis/data-source-comparison/) compares the quality of the different datasets.

**ATTENTION**  
These classes have been modified compared to the files on GitHub to be able
to be executed in colab and they store the data in **/content/data/**  


In [3]:
class CovidCases(ABC):
    """This abstract base class will expose data attributes in form of a DataFrame. It also provides methods to process 
    the data which will end up in additional columns in the DataFrame.  
    These are the names of seven columns that have to be generated by ALL subclasses.

    Date
    The date of the data 
    
    GeoID
    The ISO-3166-alpha_3 GeoID of the area such as 'FR' for France or 'DE' for Germany

    GeoName
    The name of the area such as 'England' or 'Italy'

    Population
    The population of the country

    Continent
    E.g. The continent of the country. But it also may be grouping value for e.g. the states of a federal republic such as Bavaria
    
    DailyCases
    The number of new cases on a given day

    DailyDeaths
    The number of new deaths on the given date

    Beside these fields a subclass might also define additional columns such as 'Continent'
    Based on the six mandatory columns the class will generate the following additional columns (attributes):
    
    Cases
    The accumulated number of cases since the 31.12.2019

    Deaths
    The accumulated number of deaths since the 31.12.2019

    CasesPerMillionPopulation
    The number of cumulative cases divided by the population of the country in million

    DeathsPerMillionPopulation
    The number of cumulative deaths divided by the population of the country in million

    PercentDeaths
    The number of deaths in % of the cases. This is the Case Fatality Rate (CFR), an approximation for the
    Infection Fatality Rate (IFR) that includes also 'hidden' cases.

    Incidence7DayPer100Kpopulation
    The accumulated 7 day incidence. That is the sum of daily cases of the last 7 days divided by the 
    population in 100000

    DoublingTime
    The number of days in which the number of cases will be doubled

    R0
    This is an estimation of the reproduction number R0. As the calculation takes some time it is 
    generated on demand by calling add_r0 method.

    Beside that sub-class may add additional attributes. Please refer to the documentation of the 
    specific sub-class that you want to use.

    Returns:
        You can't create an instance of this class. Instead create an instance of a subclass
    """

    def __init__(self, df):
        """The constructor takes a dataframe loaded by any sub-class containing the data published by the
        website that is handled in the sub-classes individually.  
        To retrieve the data for an individual country you can use the public methods
        GetCountryDataByGeoID or GetCountryDataByCountryName. These functions take ISO 3166 alpha_2 
        (2 characters long) GeoIDs.

        Args:
            df (dataframe): The dataframe containing information about individual countries such as
                            GeoID, CountryName, Cases and Deaths. 
        """
        # keep the data frame
        self.__df = df
        # load the geo information for the world via GitHub
        targetFilename = 'https://raw.githubusercontent.com/1c3t3a/Covid-19-analysis/master/data/GeoInformationWorld.csv'
        self.__dfGeoInformationWorld = pd.read_csv(targetFilename, keep_default_na=False)
 
    @staticmethod
    def __compute_doubling_time(dfSingleCountry):
        """Computes the doubling time for everyday day with the formula:
                ln(2) / ln(Conf[n] / Conf[n - 1])
        
        Args:
            dfSingleCountry (DataFrame): A dataframe holding only one country

        Returns:
            DataFrame: A data frame holding only one column to be appended to another data frame
        """
        result = []
        quotient = []
        for index, value in dfSingleCountry['Cases'].iteritems():
            #  calculating the quotient conf[n] / conf[n-1]
            if index > 0 and index - 1 != 0:
                quotient.append(value / dfSingleCountry['Cases'][index - 1])
            else:
                quotient.append(math.nan)
            # calculates the doubling time (can't be calculated when there's 
            # no change from one day to the other)
            if quotient[index] != 1 and quotient[index] != math.nan and quotient[index] != 0:
                result.append(math.log(2) / math.log(quotient[index]))
            else:
                result.append(math.nan)
        # return the dataframe
        return pd.DataFrame(np.asarray(result))

    @staticmethod
    def create_combined_dataframe_by_geoid_string_list(dfList, geoIDs, lastNdays=0, sinceNcases=0): 
        """Creates a combined dataframe from a list of individual datafames. To avoid
        duplicate country names the method will add a '-DATASOURCE' string behind the 
        country name (e.g. 'Germany-OWID'). 

        Args:
            dfList (tuple of DataFrame objects): A list of data frames
            geoIDs (str): A string of comma separated GeoIds that have to be included in all given data frames
            lastNdays (int, optional): Get the data only for the last N days. Defaults to 0.
            sinceNcases (int, optional): Get the data since the Nth. case has been exceeded. Defaults to 0.

        Returns:
            [DataFrame]: The combined data frame
        """         
        # a final array of dataframes containing all three data
        dfs = []
        # loop through all classes / geoIDs
        for obj in dfList:
            # get the data frame
            df = obj.get_data_by_geoid_string_list(geoIDs, lastNdays, sinceNcases)
            # rename the country and add the source info to the name
            for name in df['GeoName'].unique():
                df.replace(name, name + '-' + obj.get_data_source_info()[1], inplace=True)
            # add it to the list
            dfs.append(df)  
        # finally concatenate all dfs together
        df = pd.concat(dfs)  
        # ...and return it
        return df

    def __add_additional_attributes(self, dfSingleCountry):
        """Adds additional attributes to a dataframe of a single country.  

        Args:
            dfSingleCountry (DataFrame): A dataframe holding only one country

        Returns:
            DataFrame: The modified data frame of the country
        """
        if dfSingleCountry.empty == True:
            return
        # reset the index on the dataframe (if the argument is just a slice)
        dfSingleCountry.reset_index(inplace=True, drop=True)

        # the cumulative cases
        dfSingleCountry['Cases'] = dfSingleCountry['DailyCases'].cumsum()
        # the cumulative cases
        dfSingleCountry['Deaths'] = dfSingleCountry['DailyDeaths'].cumsum()
        # the percentage of deaths of the cumulative cases
        dfSingleCountry['PercentDeaths'] = pd.DataFrame({'PercentDeaths': dfSingleCountry['Deaths'] * 100 / dfSingleCountry['Cases']})
        # the percentage of cumulative cases of the 1 million population
        dfSingleCountry['CasesPerMillionPopulation'] = pd.DataFrame({'CasesPerMillionPopulation': dfSingleCountry['Cases'].div(dfSingleCountry['Population'].iloc[0] / 1000000)})
        # the percantage of cumulative deaths of 1 million population
        dfSingleCountry['DeathsPerMillionPopulation'] = pd.DataFrame({'DeathsPerMillionPopulation': dfSingleCountry['Deaths'].div(dfSingleCountry['Population'].iloc[0] / 1000000)})
        
        if self.get_data_source_info()[1] == 'OWID':
            # the percantage of people that received the first vaccination dose
            dfSingleCountry['PercentPeopleReceivedFirstDose'] = pd.DataFrame({'PercentPeopleReceivedFirstDose': dfSingleCountry['PeopleReceivedFirstDose'] * 100 / dfSingleCountry['Population'].iloc[0]})
            # the percantage of people that are fully vaccinated
            dfSingleCountry['PercentPeopleReceivedAllDoses'] = pd.DataFrame({'PercentPeopleReceivedAllDoses': dfSingleCountry['PeopleReceivedAllDoses'] * 100 / dfSingleCountry['Population'].iloc[0]})
        
        # adds the extra attributes
        dfSingleCountry['DoublingTime'] = self.__compute_doubling_time(dfSingleCountry)
        # now apply the country names from our internal list
        dfInfo = self.__dfGeoInformationWorld
        # return the manipulated dataframe
        return dfSingleCountry

    def __apply_lowpass_filter(self, dfAttribute, n):
        """Returns a dataframe containing the lowpass filtered (with depth n)
        data of the given dataframe.

        Args:
            dfAttribute (DataFrame): The data frame to be filtered
            n (int): Width of the lowpass filter

        Returns:
            DataFrame: A data frame holding only one column to be appended to another data frame
        """
        result = []
        # iterate the attribute
        for index, value in dfAttribute.iteritems():
            # if the dataframe contains NaN, leave it untouched
            if math.isnan(value):
                result.append(math.nan)
                continue
            if index == 0:
                result.append(value)
            # for all rows below the nth row, calculate the lowpass filter up to this point
            elif index < n:
                result.append(sum(dfAttribute[0:index + 1]) / (index + 1))
            else:
                start = index - n + 1
                result.append(sum(dfAttribute[start:start + n]) / n)
        # return the calculated data as an array
        return pd.DataFrame(np.asarray(result))

    def add_lowpass_filter_for_attribute(self, df, attribute, n):
        """Adds a attribute to the df of each country that is the lowpass filtered
        data of the given attribute. The width of the lowpass is given by then
        number n. The name of the newly created attribute is the given name
        with a tailing number n. E.g. 'DailyCases' with n = 7 will add to a newly
        added attribute named 'Cases7'.
        If the attribute already exists the function will return the given df.

        Args:
            df (DataFrame): The data frame holding all countries and all columns
            attribute (str): The name of the column to be processed
            n (int): The width of the lowpass filter

        Returns:
            DataFrame: A data frame that includes the newly generated column
        """ 
        # check if the attribute already exists
        requestedAttribute = attribute + str(n)
        for col in df.columns:
            if col == requestedAttribute:
                return df
        # get all GeoIDs in the df
        geoIDs = df['GeoID'].unique()
        # our result data frame
        dfs = []
        for geoID in geoIDs:
            # get the country dataframe
            dfSingleCountry = df.loc[df['GeoID'] == geoID].copy()
            # reset the index to start from index = 0
            dfSingleCountry.reset_index(inplace=True, drop=True)
            # add the lowpass filtered attribute
            dfSingleCountry[requestedAttribute] = self.__apply_lowpass_filter(dfSingleCountry[attribute], 7)
            # add the country to the result
            dfs.append(dfSingleCountry)
        return pd.concat(dfs)

    def __apply_r0(self, dfCases):
        """Returns a dataframe containing an estimation for the reproduction
        number R0 of the dataframe given. The given dataframe has to contain
        'DailyCases'.

        Args:
            dfCases (DataFrame): The data frame to be processed
            
        Returns:
            DataFrame: A data frame holding only one column to be appended to another data frame
        """
        # add the r0 attribute
        result = []
        # we will create 2 blocks and sum the data of each block
        blockSize = 4
        # iterate the cases
        for index, value in dfCases.iteritems():
            if index < 2 * blockSize - 1:
                result.append(math.nan)
            else:
                # the sum of block 0
                start = index - (2 * blockSize - 1)
                sum0 = sum(dfCases[start: start + blockSize])
                # the sum of block 1
                start = index - (blockSize - 1)
                sum1 = sum(dfCases[start: start + blockSize])
                # and R
                if sum0 == 0:
                    R = math.nan
                else:
                    R = sum1 / sum0
                result.append(R)
        # return the calculated data as an array
        return pd.DataFrame(np.asarray(result))

    def add_r0(self, df):
        """Adds a attribute to the df of each country that is an estimation of the
        reproduction number R0. Here the number is called 'R'. The returned
        dataframe should finally lowpassed filtered with a kernel size of 1x7.
        If the attribute already exists the function will return the given df.
        
        Args:
            df (DataFrame): The data frame holding all countries and all columns

        Returns:
            DataFrame: A data frame that includes the newly generated column
        """ 
        # check if the attribute already exists
        requestedAttribute = 'R'
        for col in df.columns:
            if col == requestedAttribute:
                return df
        # get all GeoIDs in the df
        geoIDs = df['GeoID'].unique()
        # our result data frame
        dfs = []
        for geoID in geoIDs:
            # get the country dataframe
            dfSingleCountry = df.loc[df['GeoID'] == geoID].copy()
            # reset the index to start from index = 0
            dfSingleCountry.reset_index(inplace=True, drop=True)
            # add the lowpass filtered attribute
            dfSingleCountry[requestedAttribute] = self.__apply_r0(dfSingleCountry['DailyCases'])
            # add the country to the result
            dfs.append(dfSingleCountry)
        return pd.concat(dfs)

    def __apply_incidence_7day_per_100Kpopulation(self, dfAttribute, dfPopulation):
        """Returns a dataframe containing the accumulated 7 day incidence
        of the given dataframe containing only one country.
        
        Args:
            dfAttribute (DataFrame): The data frame holding the daily ne cases
            dfPopulation (DataFrame): A data frame holding the population
            
        Returns:
            DataFrame: A data frame holding only one column to be appended to another data frame
        """
        result = []
        # iterate the attribute
        for index, value in dfAttribute.iteritems():
            # for all rows below the nth row, calculate the lowpass filter up to this point
            if index < 7:
                daysSum7 = sum(dfAttribute[0:index + 1]) * 7 / (index + 1)
                result.append(daysSum7  / (dfPopulation[index] / 100000))
            else:
                start = index - 7 + 1
                daysSum7 = sum(dfAttribute[start:start + 7])
                result.append(daysSum7 / (dfPopulation[index] / 100000))
        # return the calculated data as an array
        return pd.DataFrame(np.asarray(result))

    def add_incidence_7day_per_100Kpopulation(self, df):
        """Adds a attribute to the df of each country that is representing the
        accumulated 7-day incidence. That is the sum of the daily cases of 
        the last 7 days divided by the population in 100000 people.
        If the attribute already exists the function will return the given df.
        
        Args:
            df (DataFrame): The data frame holding all countries and all columns

        Returns:
            DataFrame: A data frame that includes the newly generated column
        """ 
        # check if the attribute exists
        requestedAttribute = 'Incidence7DayPer100Kpopulation'
        for col in df.columns:
            if col == requestedAttribute:
                return df
        # get all GeoIDs in the df
        geoIDs = df['GeoID'].unique()
        # our result data frame
        dfs = []
        for geoID in geoIDs:
            # get the country dataframe
            dfSingleCountry = df.loc[df['GeoID'] == geoID].copy()
            # reset the index to start from index = 0
            dfSingleCountry.reset_index(inplace=True, drop=True)
            # add the lowpass filtered attribute
            dfSingleCountry[requestedAttribute] = self.__apply_incidence_7day_per_100Kpopulation(dfSingleCountry['DailyCases'], dfSingleCountry['Population'])
            # add the country to the result
            dfs.append(dfSingleCountry)
        return pd.concat(dfs)

    def save_df_to_csv(self, df, filename):
        """Saves a df to a CSV file

        Args:
            df (DataFrame): The data frame holding all countries and all columns
            filename (str): The name of the output file
        """       
        df.to_csv(filename)

    def get_data_by_geoid_list(self, geoIDs, lastNdays=0, sinceNcases=0):
        """Return the dataframe by a list of geoIDs. Refer to the CSV
        file for a list of available GeoIDs and CountryNames.

        Args:
            geoIDs (list): A list of strings holding the GeoIds
            lastNdays (int, optional): Get the data only for the last N days. Defaults to 0.
            sinceNcases (int, optional): Get the data since the Nth. case has been exceeded. Defaults to 0.

        Raises:
            ValueError: In case that both optional arguments have been used (>0) 

        Returns:
            DataFrame: A data frame holding the information of the selected countries
        """
        # correct potentially incorrect lists
        geoIDs = self.review_geoid_list(geoIDs)
        # check if only one optional parameter is used
        if lastNdays > 0 and sinceNcases > 0:
            raise ValueError("Only one optional parameter allowed!")
        # our result data frame
        dfs = []
        # get data for each country
        for geoID in geoIDs:
            # get the data for a country and add the additional rows
            df = self.__df.loc[self.__df['GeoID'] == geoID].copy()
            # reverse the data frame to the newest date in the bottom
            df = df.reindex(index=df.index[::-1])
            df.head()
            df = self.__add_additional_attributes(df)
            # if lastNdays is specified just return these last n days
            if lastNdays > 0:
                df = df.tail(lastNdays)
            # if sinceNcases is specified calculate the start index
            if sinceNcases > 0:
                start = -1
                for index, val in df['Cases'].iteritems():
                    if val >= sinceNcases:
                        start = index
                        break
                # an illegal input will cause an exception
                if start == -1:
                    raise ValueError("Number of cases wasn't that high!")
                # copy the data
                df = df.iloc[start:].copy()
                # reset the index on the remaining data points so that they
                # start at zero
                df.reset_index(inplace=True, drop=True)
            # append this dataframe to our result
            dfs.append(df)
        # return the concatenated dataframe
        return pd.concat(dfs)

    def get_data_by_geoid_string_list(self, geoIDstringList, lastNdays=0, sinceNcases=0):
        """Return the dataframe by a comma separated list of geoIDs. Refer to the CSV
        file for a list of available GeoIDs and CountryNames.

        Args:
            geoIDs (str): A string of comma separated GeoIds
            lastNdays (int, optional): Get the data only for the last N days. Defaults to 0.
            sinceNcases (int, optional): Get the data since the Nth. case has been exceeded. Defaults to 0.

        Raises:
            ValueError: In case that both optional arguments have been used (>0) 

        Returns:
            DataFrame: A data frame holding the information of the selected countries
        """
        # split the string
        geoIDs = re.split(r',\s*', geoIDstringList.upper())
        # return the concatenated dataframe
        return self.get_data_by_geoid_list(geoIDs, lastNdays, sinceNcases)

    def get_all_data(self):
        """Return the dataframe of all countries in the database.
        
        Returns:
            DataFrame: A data frame holding the information of all countries in the file
        """
        # return all countries, but first add the extra columns
        return self.get_data_by_geoid_list(self.__df['GeoID'].unique())

    @abstractmethod
    def get_available_GeoID_list(self):
        """
        Returns a dataframe having just two columns for the GeoID and region/country or whatever name.  
        Needs to be implemented by all sub-classes derived from this.

        Returns:
            Dataframe: A dataframe having two columns: The country name and GeoID
        """
        pass 

    @abstractmethod
    def get_data_source_info(self):
        """
        Returns a dataframe containing information about the data source. The dataframe holds 3 columns:
        InfoFullName: The full name of the data source
        InfoShortName: A shortname for the data source
        InfoLink: The link to get the data

        Returns:
            Dataframe: A dataframe holding the information
        """
        pass 

    @abstractmethod
    def review_geoid_list(self, geoIDs):
        """
        Returns a corrected version of the given geoID list to ensure that cases of mismatches like UK-GB are corrected by the sub-class.  
        geoIDs: The list holding the geoIDs as requested such as ['DE', 'UK']

        Returns:
            list: A corrected list such as ['DE', 'GB'] that translates incorrect country codes to corrected codes 
        """
        pass 

    
class CovidCasesWHO(CovidCases):
    """The class will expose data attributes in form of a DataFrame. Its base class also provides methods to process 
    the data which will end up in additional columns in the DataFrame. These are the name sof the columns
    that are generated. Notice: The 'Continent' column is additionally and specific to this sub class.

    ATTENTION: The CovidCasesWHOv1 class is a older version of this class and it will load 50% slower. Both classes
               produce the same results

    Date
    The date of the data 
    
    GeoID
    The GeoID of the country such as FR for France or DE for Germany

    GeoName
    The name of the country

    Continent
    The continent of the country

    Population
    The population of the country

    DailyCases
    The number of new cases on a given day

    DailyDeaths
    The number of new deaths on the given date

    Continent
    The continent of the country as an additional column
    
    Returns:
        CovidCasesWHO: A class to provide access to some data based on the WHO file.
    """

    def __init__(self, filename):
        """The constructor takes a string containing the full filename of a CSV
        database you can download from the WHO website:
        https://covid19.who.int/WHO-COVID-19-global-data.csv
        The database will be loaded and kept as a private member. To retrieve the
        data for an individual country you can use the public methods
        GetCountryDataByGeoID or GetCountryDataByCountryName. These functions take 
        ISO 3166 alpha_2 (2 characters long) GeoIDs.

        Args:
            filename (str): The full path and name of the csv file. 
        """
        # some benchmarking
        start = time.time()
        # open the file
        self.__df = pd.read_csv(filename, keep_default_na=False)
        # drop some columns
        self.__df = self.__df.drop(columns=['WHO_region',
                                            'Cumulative_cases',
                                            'Cumulative_deaths'])
        # rename the columns to be more readable
        self.__df.columns = ['Date',
                             'GeoID',
                             'GeoName',
                             'DailyCases',
                             'DailyDeaths']
        
        # now apply the country names from our internal list
        giw = GeoInformationWorld()
        # get all country info
        dfInfo = giw.get_geo_information_world()
        # our result data frame
        dfs = []
        for geoID in self.__df['GeoID'].unique():
            # 'other' fix
            if geoID == ' ':
                continue
            # 'Saba' fix
            if geoID == 'XC':
                continue
            # Sint Eustatius
            if geoID == 'XB':
                continue
            # American Samoa
            if geoID == 'AS':
                continue
            # Korea, People's Republic
            if geoID == 'KP':
                continue
            # French Guinea
            if geoID == 'GF':
                continue
            # Guadeloupe
            if geoID == 'GP':
                continue
            # Kiribati
            if geoID == 'KI':
                continue
            # Martinique
            if geoID == 'MQ':
                continue
            # Mayotte
            if geoID == 'YT':
                continue
            # Micronesia 
            if geoID == 'FM':
                continue
            # Nauru
            if geoID == 'NR':
                continue
            # Niue
            if geoID == 'NU':
                continue
            # Palau
            if geoID == 'PW':
                continue
            # Pitcairn Islands
            if geoID == 'PN':
                continue
            # Réunion
            if geoID == 'RE':
                continue
            # Saint Barthélemy
            if geoID == 'BL':
                continue
            # Saint Helena
            if geoID == 'SH':
                continue
            # Saint Martin
            if geoID == 'MF':
                continue
            # Saint Pierre and Miquelon
            if geoID == 'PM':
                continue
            # Turkmenistan
            if geoID == 'TM':
                continue
            # Tokelau
            if geoID == 'TK':
                continue
            # Tonga
            if geoID == 'TO':
                continue
            # Tuvalu
            if geoID == 'TV':
                continue

            # get the data for a country and add the additional rows
            dfSingleCountry = self.__df.loc[self.__df['GeoID'] == geoID].copy()
            # reset the index
            dfSingleCountry.reset_index(inplace=True, drop=True)
            dfSingleCountry.head()
            # Bonaire workaround
            if geoID == 'XA':
                geoID = 'BQ'
                dfSingleCountry['GeoID'] = [geoID for _ in range(0, len(dfSingleCountry['GeoID']))]    
            # get the geoName for this geoID from our internal list
            geoName = giw.geo_name_from_geoid(geoID)
            # the current name         
            curName = dfSingleCountry['GeoName'][0]
            # replace it if necessary
            if geoName != curName:
                dfSingleCountry['GeoName'] = [geoName for _ in range(0, len(dfSingleCountry['GeoID']))]
            # get the continent for this geoID from our internal list
            continent = giw.continent_from_geoid(geoID)
            # apply it to this country
            dfSingleCountry['Continent'] = [continent for _ in range(0, len(dfSingleCountry['GeoID']))]
            # get the population for this geoID from our internal list
            population = giw.population_from_geoid(geoID)
            # apply it to this country
            dfSingleCountry['Population'] = [population for _ in range(0, len(dfSingleCountry['GeoID']))]
            # re-order it from newest to olders (top-bottom)
            dfSingleCountry = dfSingleCountry.reindex(index=dfSingleCountry.index[::-1])
            # append this dataframe to our result
            dfs.append(dfSingleCountry)
        # keep the concatenated dataframe
        self.__df = pd.concat(dfs)
        # re-order the columns to be similar for all sub-classes                                   
        self.__df = self.__df[['Date', 
                              'GeoName', 
                              'GeoID', 
                              'Population', 
                              'Continent', 
                              'DailyCases',
                              'DailyDeaths']]
        # change the type of the 'date' field to a pandas date
        self.__df['Date'] = pd.to_datetime(self.__df['Date'],
                                           format='%Y-%m-%d')
        # some benchmarking
        end = time.time()
        print('Pandas loading the WHO CSV: ' + str(end - start) + 's')
        # pass the dataframe to the base class
        super().__init__(self.__df)

    @staticmethod
    def download_CSV_file():
        """automatically downloads the database file if it doesn't exists. Need
        to be called in a try-catch block as it may throw FileNotFoundError or
        IOError errors

        Raises:
            FileNotFoundError: In case it couldn't download the file

        Returns:
            str: The filename of the database wether it has been downloaded or not.
        """
        # todays date
        today = date.today()
        # the prefix of the CSV file is Y-m-d
        preFix = today.strftime('%Y-%m-%d') + "-WHO"
        # the target file
        targetFilename = '/content/data/' + preFix + '-db.csv'
        # check if it exist already
        if os.path.exists(targetFilename):
            print('using existing file: ' + targetFilename)
        else:
            # download the file from the ecdc server
            url = 'https://covid19.who.int/WHO-COVID-19-global-data.csv'
            r = requests.get(url, timeout=1.0)
            if r.status_code == requests.codes.ok:
                with open(targetFilename, 'wb') as f:
                    f.write(r.content)
            else:
                raise FileNotFoundError('Error getting CSV file. Error code: ' + str(r.status_code))
        return targetFilename

    def get_available_GeoID_list(self):
        """Returns a dataframe having just two columns for the GeoID and Country name

        Returns:
            Dataframe: A dataframe having two columns: The country name and GeoID
        """ 
        # the list of GeoIDs in the dataframe
        geoIDs = self.__df['GeoID'].unique()
        # the list of country names in the dataframe
        countries = self.__df['GeoName'].unique()
        # merge them together
        list_of_tuples = list(zip(geoIDs, countries))
        # create a dataframe out of the list
        dfResult = pd.DataFrame(list_of_tuples, columns=['GeoID', 'GeoName'])
        return dfResult

    def get_data_source_info(self):
        """
        Returns a list containing information about the data source. The list holds 3 strings:
        InfoFullName: The full name of the data source
        InfoShortName: A shortname for the data source
        InfoLink: The link to get the data

        Returns:
            Dataframe: A dataframe holding the information
        """
        info = ["World Health Organization", 
                "WHO",
                "https://covid19.who.int/WHO-COVID-19-global-data.csv"]
        return info

    def review_geoid_list(self, geoIDs):
        """
        Returns a corrected version of the given geoID list to ensure that cases of mismatches like UK-GB are corrected by the sub-class.  
        geoIDs: The list holding the geoIDs as requested such as ['DE', 'UK']

        Returns:
            list: A corrected list such as ['DE', 'GB'] that translates incorrect country codes to corrected codes 
        """
        # fix the ECDC mistakes and map e.g. UK to GB 
        corrected = []
        for geoID in geoIDs:
            if geoID == 'UK':
                corrected.append('GB')
            elif geoID == 'EL':
                corrected.append('GR')
            elif geoID == 'TW':
                corrected.append('CN')
            else:
                corrected.append(geoID)
        return corrected

    @staticmethod
    def get_pygal_european_geoid_list():
        """Returns a list of GeoIDs of European countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_european_geoid_string_list().upper())
        return geoIDs

    @staticmethod
    def get_pygal_european_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of European countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main european countries for a map, pygal doesn't contain e.g. 
        # Andorra, Kosovo (XK)
        geoIdList = 'AM, AL, AZ, AT, BA, BE, BG, BY, CH, CY, CZ, ' + \
                    'DE, DK, EE, GR, ES, FI, FR, GE, GL, '  + \
                    'HU, HR, IE, IS, IT, LV, LI, LT, ' + \
                    'MD, ME, MK, MT, NL, NO, PL, PT, ' + \
                    'RU, SE, SI, SK, RO, UA, GB, RS'
        return geoIdList

    @staticmethod
    def get_pygal_american_geoid_list():
        """Returns a list of GeoIDs of American countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_american_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_american_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of American countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main american countries for a map, pygal doesn't contain e.g. 
        # Bahamas (BS), Barbados (BB), Bermuda (BM), Falkland Island (FK)
        # 2022-01-22 added BZ
        geoIdList = 'AR, BB, BM, BO, BR, BS, CA, CL, CO, ' + \
                    'CR, CU, DO, EC, SV, GT, GY, HN, HT, ' + \
                    'JM, MX, NI, PA, PE, PR, PY, SR, US, UY, VE, BZ'
        return geoIdList

    @staticmethod
    def get_pygal_asian_geoid_list():
        """Returns a list of GeoIDs of Asian countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_asian_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_asian_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of Asian countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main asian countries for a map, pygal doesn't contain e.g. 
        # Qatar (QA)
        geoIdList = 'AF, BH, BD, BT, BN, KH, CN, IR, IQ, IL, JP, JO, '  + \
                    'KZ, KW, KG, LA, LB, MY, MV, MN, MM, NP, OM, PK, PS, PH, '  + \
                    'QA, SA, SG, KR, LK, SY, TJ, TH, TL, TR, AE, UZ, VN, YE, IN, ID'
        return geoIdList
    
    @staticmethod
    def get_pygal_african_geoid_list():
        """Returns a list of GeoIDs of African countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_african_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_african_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of African countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main african countries for a map, pygal doesn't contain e.g. 
        # Comoros (KM)
        # 2022-01-22 added NA
        geoIdList = 'DZ, AO, BJ, BW, BF, BI, CM, CV, CF, TD, KM, CG, CI, CD, '  + \
                    'DJ, EG, GQ, ER, SZ, ET, GA, GM, GH, GN, GW, KE, LS, LR, '  + \
                    'LY, MG, MW, ML, MR, MU, MA, MZ, NE, NG, RW, ST, SN, SC, '  + \
                    'SL, SO, ZA, SS, SD, TG, TN, UG, TZ, EH, ZM, ZW, NA'
        return geoIdList

    @staticmethod
    def get_pygal_oceania_geoid_list():
        """Returns a list of GeoIDs of Oceanian countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_oceania_geoid_string_list().upper())
        return geoIDs

    @staticmethod
    def get_pygal_oceania_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of Oceanian countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main oceania countries for a map, pygal doesn't contain e.g. 
        # Comoros (KM)
        geoIdList = 'AU, FJ, PF, GU, NC, NZ, MP, PG'
        return geoIdList

    
class CovidCasesOWID(CovidCases):
    """The class will expose data attributes in form of a DataFrame. Its base class also provides methods to process 
    the data which will end up in additional columns in the DataFrame. These are the name sof the columns
    that are generated. Notice: The 'Continent' column is additionally and specific to this sub class

    Date
    The date of the data 
    
    GeoID
    The GeoID of the country such as FR for France or DE for Germany

    GeoName
    The name of the country

    Population
    The population of the country

    Continent
    The continent of the country

    DailyCases
    The number of new cases on a given day

    DailyDeaths
    The number of new deaths on the given date

    DailyVaccineDosesAdministered7DayAverage
    New COVID-19 vaccination doses administered (7-day smoothed). For countries that 
    don't report vaccination data on a daily basis, we assume that vaccination 
    changed equally on a daily basis over any periods in which no data was reported. 
    This produces a complete series of daily figures, which is then averaged over a 
    rolling 7-day window. 
    In OWID words this is the new_vaccinations_smoothed value.
                              
    PeopleReceivedFirstDose
    Total number of people who received at least one vaccine dose.
    In OWID words this is the people_vaccinated value.

    PeopleReceivedAllDoses
    Total number of people who received all doses prescribed by the vaccination protocol.
    In OWID words this is the people_fully_vaccinated value.

    VaccineDosesAdministered
    Total number of COVID-19 vaccination doses administered. It's the sum of 
    PeopleReceivedFirstDose and PeopleReceivedAllDoses.
    In OWID words this is the total_vaccinations value.

    Continent
    The continent of the country as an additional column.
    
    Returns:
        CovidCasesOWID: A class to provide access to some data based on the OWID file.
    """

    def __init__(self, filename):
        """The constructor takes a string containing the full filename of a CSV
        database you can download from the OWID website:
        https://covid.ourworldindata.org/data/owid-covid-data.csv
        The database will be loaded and kept as a private member. To retrieve the
        data for an individual country you can use the public methods
        GetCountryDataByGeoID or GetCountryDataByCountryName. These functions take 
        ISO 3166 alpha_2 (2 characters long) GeoIDs.

        Args:
            filename (str): The full path and name of the csv file. 
        """
        # some benchmarking
        start = time.time()
        # open the file
        self.__df = pd.read_csv(filename)
        # remove columns that we don't need
        self.__df = self.__df.drop(columns=['total_cases', 
                                            'new_cases_smoothed', 
                                            'total_deaths', 
                                            'new_deaths_smoothed', 
                                            'total_cases_per_million',
                                            'new_cases_per_million',
                                            'new_cases_smoothed_per_million',
                                            'total_deaths_per_million',
                                            'new_deaths_per_million',
                                            'new_deaths_smoothed_per_million',
                                            'reproduction_rate',
                                            'icu_patients',
                                            'icu_patients_per_million',
                                            'hosp_patients',
                                            'hosp_patients_per_million',
                                            'weekly_icu_admissions',
                                            'weekly_icu_admissions_per_million',
                                            'weekly_hosp_admissions',
                                            'weekly_hosp_admissions_per_million',
                                            'new_tests',
                                            'total_tests',
                                            'total_tests_per_thousand',
                                            'new_tests_per_thousand',
                                            'new_tests_smoothed',
                                            'new_tests_smoothed_per_thousand',
                                            'positive_rate',
                                            'tests_per_case',
                                            'tests_units',
                                            #'total_vaccinations',
                                            'total_vaccinations_per_hundred',
                                            'stringency_index',
                                            'population_density',
                                            'median_age',
                                            'aged_65_older',
                                            'aged_70_older',
                                            'gdp_per_capita',
                                            'extreme_poverty',
                                            'cardiovasc_death_rate',
                                            'diabetes_prevalence',
                                            'female_smokers',
                                            'male_smokers',
                                            'handwashing_facilities',
                                            'hospital_beds_per_thousand',
                                            'life_expectancy',
                                            'human_development_index',
                                            # three more columns have been introduced
                                            'new_vaccinations',
                                            #'new_vaccinations_smoothed',
                                            'new_vaccinations_smoothed_per_million',
                                            #'people_fully_vaccinated',
                                            'people_fully_vaccinated_per_hundred',
                                            #'people_vaccinated',
                                            'people_vaccinated_per_hundred',
                                            # again a new field
                                            'excess_mortality',
                                            # and of course some new fields
                                            'total_boosters',
                                            'total_boosters_per_hundred',
                                            # some more
                                            'excess_mortality_cumulative_absolute',
                                            'excess_mortality_cumulative',
                                            'excess_mortality_cumulative_per_million',
                                            'excess_mortality',
                                            'new_people_vaccinated_smoothed',
                                            'new_people_vaccinated_smoothed_per_hundred'])
        if self.__df.columns.size != 11:
            # oops, there are some new columns in the csv
            print('Detecting new cols in OWID CSV: ' + self.__df.columns)
            # add the new cols to a list
            cols = [self.__df.columns[col] for col in range (11, self.__df.columns.size)]
            # ...and drop them
            self.__df = self.__df.drop(columns=cols)
            print('Accepting cols in OWID CSV: ' + self.__df.columns)
        # rename the columns to be more readable
        self.__df.columns = ['GeoID',
                             'Continent',
                             'GeoName',
                             'Date',
                             'DailyCases',
                             'DailyDeaths',
                             'VaccineDosesAdministered',
                             'PeopleReceivedFirstDose',
                             'PeopleReceivedAllDoses',
                             'DailyVaccineDosesAdministered7DayAverage',
                             'Population']
        #print(self.__df.columns)
        # change the type of the 'date' field to a pandas date
        self.__df['Date'] = pd.to_datetime(self.__df['Date'],
                                           format='%Y/%m/%d')
        # re-order the columns to be similar for all sub-classes                                   
        self.__df = self.__df[['Date', 
                              'GeoName', 
                              'GeoID', 
                              'Population', 
                              'Continent', 
                              'DailyCases',
                              'DailyDeaths',
                              'DailyVaccineDosesAdministered7DayAverage',
                              'PeopleReceivedFirstDose',
                              'PeopleReceivedAllDoses',
                              'VaccineDosesAdministered']]
        #print(self.__df)
        df = self.__df
        # to apply the country names from our internal list
        giw = GeoInformationWorld()
        # get all country info
        dfInfo = giw.get_geo_information_world()
        # we need the newest date being on top, get all GeoIDs in the df
        geoIDs = df['GeoID'].unique()
        # our result data frame
        dfs = []
        for geoID in geoIDs:
            # 'nan' workaround
            if str(geoID) == 'nan':
                # nothing else worked to detect this nan (it's the 'international' line in the file that doesn't have any GeoIds)
                continue
            # get the country dataframe
            dfSingleCountry = df.loc[df['GeoID'] == geoID].copy()
            # reset the index to start from index = 0
            dfSingleCountry.reset_index(inplace=True, drop=True)
            dfSingleCountry = dfSingleCountry.reindex(index=dfSingleCountry.index[::-1])  
            # 'Kosovo' workaround
            if geoID == 'OWID_KOS':
                geoID = 'KOS'
            # 'OWID World' workaround
            if geoID == 'OWID_WRL':
                continue
            # get the geoName for this geoID from our internal list
            geoName = giw.geo_name_from_ISO3166_alpha_3(geoID)
            # get the alpha-2 geoID from the alpha-3 geoID
            geoID2 = giw.geoID_from_ISO3166_alpha_3(geoID)
            # the current name         
            curName = dfSingleCountry['GeoName'][0]
            # replace it if necessary
            if geoName != curName:
                dfSingleCountry['GeoName'] = [geoName for _ in range(0, len(dfSingleCountry['GeoID']))]
            # now overwrite the alpha-3 geoID with the alpha-2 geoID so all sublasses can use the same geoIDs
            dfSingleCountry['GeoID'] = [geoID2 for _ in range(0, len(dfSingleCountry['GeoID']))]    
            # add the country to the result
            dfs.append(dfSingleCountry)
        # done, keep the list
        self.__df = pd.concat(dfs)
        # some benchmarking
        end = time.time()
        print('Pandas loading the OWID CSV: ' + str(end - start) + 's')
        # pass the dataframe to the base class
        super().__init__(self.__df)

    @staticmethod
    def download_CSV_file():
        """automatically downloads the database file if it doesn't exists. Need
        to be called in a try-catch block as it may throw FileNotFoundError or
        IOError errors

        Raises:
            FileNotFoundError: In case it couldn't download the file

        Returns:
            str: The filename of the database wether it has been downloaded or not.
        """
        # todays date
        today = date.today()
        # the prefix of the CSV file is Y-m-d
        preFix = today.strftime('%Y-%m-%d') + "-OWID"
        # the target file
        targetFilename = '/content/data/' + preFix + '-db.csv'
        # check if it exist already
        if os.path.exists(targetFilename):
            print('using existing file: ' + targetFilename)
        else:
            # download the file from the ecdc server
            url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
            r = requests.get(url, timeout=1.0)
            if r.status_code == requests.codes.ok:
                with open(targetFilename, 'wb') as f:
                    f.write(r.content)
            else:
                raise FileNotFoundError('Error getting CSV file. Error code: ' + str(r.status_code))
        return targetFilename

    def get_available_GeoID_list(self):
        """Returns a dataframe having just two columns for the GeoID and Country name

        Returns:
            Dataframe: A dataframe having two columns: The country name and GeoID
        """ 
        # the list of GeoIDs in the dataframe
        geoIDs = self.__df['GeoID'].unique()
        # the list of country names in the dataframe
        countries = self.__df['GeoName'].unique()
        # merge them together
        list_of_tuples = list(zip(geoIDs, countries))
        # create a dataframe out of the list
        dfResult = pd.DataFrame(list_of_tuples, columns=['GeoID', 'GeoName'])
        return dfResult

    def get_data_source_info(self):
        """
        Returns a list containing information about the data source. The list holds 3 strings:
        InfoFullName: The full name of the data source
        InfoShortName: A shortname for the data source
        InfoLink: The link to get the data

        Returns:
            Dataframe: A dataframe holding the information
        """
        info = ["Our World In Data", 
                "OWID",
                "https://covid.ourworldindata.org/data/owid-covid-data.csv"]
        return info

    def review_geoid_list(self, geoIDs):
        """
        Returns a corrected version of the given geoID list to ensure that cases of mismatches like UK-GB are corrected by the sub-class.  
        geoIDs: The list holding the geoIDs as requested such as ['DE', 'UK']

        Returns:
            list: A corrected list such as ['DE', 'GB'] that translates incorrect country codes to corrected codes 
        """
        # fix the ECDC mistakes and map e.g. UK to GB 
        corrected = []
        for geoID in geoIDs:
            if geoID == 'UK':
                corrected.append('GB')
            elif geoID == 'EL':
                corrected.append('GR')
            else:
                corrected.append(geoID)
        return corrected

    @staticmethod
    def get_pygal_european_geoid_list():
        """Returns a list of GeoIDs of European countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_european_geoid_string_list().upper())
        return geoIDs

    @staticmethod
    def get_pygal_european_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of European countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main european countries for a map, pygal doesn't contain e.g. 
        # Andorra, Kosovo (XK)
        geoIdList = 'AM, AL, AZ, AT, BA, BE, BG, BY, CH, CY, CZ, ' + \
                    'DE, DK, EE, GR, ES, FI, FR, GE, GL, '  + \
                    'HU, HR, IE, IS, IT, LV, LI, LT, ' + \
                    'MD, ME, MK, MT, NL, NO, PL, PT, ' + \
                    'RU, SE, SI, SK, RO, UA, GB, RS'
        return geoIdList

    @staticmethod
    def get_pygal_american_geoid_list():
        """Returns a list of GeoIDs of American countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_american_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_american_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of American countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main american countries for a map, pygal doesn't contain e.g. 
        # Bahamas (BS), Barbados (BB), Bermuda (BM), Falkland Island (FK)
        geoIdList = 'AR, BB, BM, BO, BR, BS, CA, CL, CO, ' + \
                    'CR, CU, DO, EC, SV, GT, GY, HN, HT, ' + \
                    'JM, MX, NI, PA, PE, PR, PY, SR, US, UY, VE'
        return geoIdList

    @staticmethod
    def get_pygal_asian_geoid_list():
        """Returns a list of GeoIDs of Asian countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_asian_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_asian_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of Asian countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main asian countries for a map, pygal doesn't contain e.g. 
        # Qatar (QA)
        geoIdList = 'AF, BH, BD, BT, BN, KH, CN, IR, IQ, IL, JP, JO, '  + \
                    'KZ, KW, KG, LA, LB, MY, MV, MN, MM, NP, OM, PK, PS, PH, '  + \
                    'QA, SA, SG, KR, LK, SY, TW, TJ, TH, TL, TR, AE, UZ, VN, YE, IN, ID'
        return geoIdList
    
    @staticmethod
    def get_pygal_african_geoid_list():
        """Returns a list of GeoIDs of African countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_african_geoid_string_list().upper())
        return geoIDs
        
    @staticmethod
    def get_pygal_african_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of African countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main african countries for a map, pygal doesn't contain e.g. 
        # Comoros (KM)
        # 2022-01-20 added NA
        geoIdList = 'DZ, AO, BJ, BW, BF, BI, CM, CV, CF, TD, KM, CG, CI, CD, '  + \
                    'DJ, EG, GQ, ER, SZ, ET, GA, GM, GH, GN, GW, KE, LS, LR, '  + \
                    'LY, MG, MW, ML, MR, MU, MA, MZ, NE, NG, RW, ST, SN, SC, '  + \
                    'SL, SO, ZA, SS, SD, TG, TN, UG, TZ, EH, ZM, ZW, NA'
        return geoIdList

    @staticmethod
    def get_pygal_oceania_geoid_list():
        """Returns a list of GeoIDs of Oceanian countries that are available in PayGal and 
        the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            list: List of strings of GeoID's
        """
        # just the main countries for a map
        geoIDs = re.split(r',\s*', CovidCases.get_pygal_oceania_geoid_string_list().upper())
        return geoIDs

    @staticmethod
    def get_pygal_oceania_geoid_string_list():
        """
        Returns a comma separated list of GeoIDs of Oceanian countries that are available in 
        PayGal and the WHO data. 
        Be aware:
        Not all countries of the WHO are available in PayGal and some names are different 
        (GB in PyGal = UK in WHO, GR in PyGal = EL in WHO). PyGal uses lower case and WHO
        upper case. 

        Returns:
            str: A comma separate list of GeoID's
        """
        # just the main oceania countries for a map, pygal doesn't contain e.g. 
        # Comoros (KM)
        geoIdList = 'AU, FJ, PF, GU, NC, NZ, MP, PG'
        return geoIdList


# CovidFoliumMap class

This abstract class acts as a base class for other classes that implement different folium maps based on different data 
sources. Here are some usefull links:

- Geodata visualization   
  Folium: The documentation is available on https://python-visualization.github.io/folium/  
   Different basemaps are available on https://leaflet-extras.github.io/leaflet-providers/preview/
            

In [4]:
!pip install geopandas
!pip install folium
import pandas as pd
import numpy as np
import os
import geopandas as gpd
import folium
import requests
import json
import time
import datetime
from abc import ABC, abstractmethod
from pathlib import Path

class FoliumCovid19Map(ABC):
    """
    This abstract base class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes. It does this  
    by providing access to a pandas geoJSON dataframe and a data dataframe. It also also provides methods to generate a default map.
    """
    def __init__(self, dfGeo, dfData, dataDirectory):
        """
        The constructor takes two dataframes. One containing geoJSON information and a second containing CoVid-19 data. 

        Args:
            dfGeo (dataframe): The geoPandas dataframe containing geometry information of the countries and regions of the world.  
            dfData (dataframe): The 'regular' Pandas dataframe containing Covid-19 data to be shown on the map
        """
        # keep the data frames
        self.__dfGeo = dfGeo
        self.__dfData = dfData,
        self.__dataDirectory = dataDirectory

    def get_geo_df(self):
        """Returns the geoPandas data frame
        
        Args:
            -

        Returns:
            DataFrame: The geoPandas data frame containing geoJSON geometries
        """
        return self.__dfGeo
    
    def get_data_df(self):
        """Returns the geoPandas data frame
        
        Args:
            -

        Returns:
            DataFrame: The Pandas data frame containing Covid-19 data
        """
        return self.__dfData

    def get_data_directory(self):
        """Returns the data directory as a string
        
        Args:
            -

        Returns:
            DataDirectory: A string pointing to the absolute data directory path
        """
        return self.__dataDirectory

    @abstractmethod
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        pass 

    @abstractmethod
    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        pass 

    @abstractmethod
    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it 

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        pass 

    @abstractmethod
    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        pass 


You should consider upgrading via the '/Users/cmbt/.pyenv/versions/3.7.7/bin/python3 -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/Users/cmbt/.pyenv/versions/3.7.7/bin/python3 -m pip install --upgrade pip' command.[0m


# CovidFoliumMapXXX classs

This classes implement different folium maps based on the data of the WHO using the CovidCases, CovidCasesWHO and in case of the World and Asia maps also the CovidCasesOWID class to map the Taiwan cases as well.  
The class inherits from the CovidFoliumMap class.


In [5]:

class FoliumCovid19MapEurope(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapEurope'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_european_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]     
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map generated by CMBT, 2022', location=[51.3, 10.5], tiles=basemap, zoom_start=4)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias    

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray


class FoliumCovid19MapAsia(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapAsia'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests WHO database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the WHO countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_asian_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        try:
            # get the OWID database as well
            dataFile = CovidCasesOWID.download_CSV_file()
            # get the OWID data
            owidData = CovidCasesOWID(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the taiwan data
        dfTW = owidData.get_data_by_geoid_string_list('TW')
        # add the incidence
        dfTW = owidData.add_incidence_7day_per_100Kpopulation(dfTW)  
        # append it
        df = pd.concat([df, dfTW])  
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]      
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map genrated by CMBT, 2022', location=[23, 92], tiles=basemap, zoom_start=4)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias  

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray

class FoliumCovid19MapAmerica(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapAmerica'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_american_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]       
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map generated by CMBT, 2022', location=[16, -86], tiles=basemap, zoom_start=3)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias  

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray

class FoliumCovid19MapOceania(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapOceania'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_oceania_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]       
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map generated by CMBT, 2022', location=[-26, 147], tiles=basemap, zoom_start=4)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias  

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray

class FoliumCovid19MapAfrica(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapAfrica'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_african_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]      
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map generated by CMBT, 2022', location=[5, 19], tiles=basemap, zoom_start=4)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias  

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray

class FoliumCovid19MapWorld(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapWorld'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'WorldCountries.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
            # the manual download link is
            # 'https://github.com/datasets/geo-countries/blob/master/data/countries.geojson'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # adjust column names
        geoDf.columns = ['Name', 'ISO-3166-alpha_3', 'GeoID', 'geometry']
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        try:
            # get the latests WHO database file as a CSV
            dataFile = CovidCasesWHO.download_CSV_file()
            # get the data for the WHO countryList
            whoData = CovidCasesWHO(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the list of comma separated geoIDs
        countryList = whoData.get_pygal_asian_geoid_string_list()  + ',' + \
                      whoData.get_pygal_european_geoid_string_list()  + ',' + \
                      whoData.get_pygal_american_geoid_string_list()  + ',' + \
                      whoData.get_pygal_african_geoid_string_list()  + ',' + \
                      whoData.get_pygal_oceania_geoid_string_list()
        # get the data for the country list
        df = whoData.get_data_by_geoid_string_list(countryList)
        # add the incidence
        df = whoData.add_incidence_7day_per_100Kpopulation(df)
        try:
            # get the OWID database as well
            dataFile = CovidCasesOWID.download_CSV_file()
            # get the OWID data
            owidData = CovidCasesOWID(dataFile)
        except Exception as e:
            if hasattr(e, 'message'):
                print(e.message)
            else:
                print(e)  
            return df
        # the taiwan data
        dfTW = owidData.get_data_by_geoid_string_list('TW')
        # add the incidence
        dfTW = owidData.add_incidence_7day_per_100Kpopulation(dfTW)  
        # append it
        df = pd.concat([df, dfTW])  
        # get the data for last friday, on days reporting will not be good
        today = date.today()
        # take care of weekends as the data is often not available on weekends
        if (today.weekday() == 0) or (today.weekday() == 6):
            last_friday = this_or_last_weekday(date.today(), 4)
            self.__generationDate = date(last_friday.year, last_friday.month, last_friday.day)
        else:
            self.__generationDate = today - timedelta(1)
        # get the data for that date
        dfDate = df.loc[df['Date'] == pd.to_datetime(self.__generationDate)]      
        #print(dfDate.head())
        # ...and return df
        return dfDate
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'PercentDeaths',
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'CasesPerMillionPopulation',
                                                    'DeathsPerMillionPopulation']],
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='WHO data. Map generated by CMBT, 2022', location=[15, 0], tiles=basemap, zoom_start=2)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        #bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'PercentDeaths',
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'Incidence7DayPer100Kpopulation',
                                            'CasesPerMillionPopulation',
                                            'DeathsPerMillionPopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'GeoID'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias  

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'cartodbpositron',
                    'Stamen Terrain']
        return mapArray


# CovidFoliumMapRKIXXX classs

This classes implement different folium maps based on the data of the Robert Koch Institute. There (so far) two maps available. One showing the 7-day incidence data for German Stated, the other for German Cities and Counties.  
The class inherits from the CovidFoliumMap class.


In [6]:

class FoliumCovid19MapDEcounties(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for counties and cities in Germany. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapDEcounty'
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data(self.__dfGeo)
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German counties and cities or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'RKI_Corona_Landkreise.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_Landkreisdaten/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json'
            # the manual download link is
            # 'https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0/explore?location=51.282342%2C10.714458%2C6.71'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
            #print(geoDf.head())
        # finally return the geo df
        return geoDf

    def __get_covid_data(self, geoDf):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German counties and cities or None if it can't load the file
        """
        # init the result
        df = None
        # get the date
        today = date.today()
        # the prefix of the CSV file is Y-m-d
        preFix = today.strftime('%Y-%m-%d') + "-RKIcounty"
        # the target filename of the csv to be downloaded
        targetFilename = self.__dataDirectory + '/' + preFix + '-db.csv'
        # check if it exist already
        if os.path.exists(targetFilename):
            print('using existing file: ' + targetFilename)
            # read the file
            df = pd.read_csv(targetFilename)
        else:
            print('Downloading data, that might take some time...')
            # build a result df
            dfs = []
            for id in geoDf['RS']:
                try:
                    # get the data for the county
                    df = self.__get_county_data_from_web(id)
                    # add it to the list
                    dfs.append(df)
                except:
                    msg = 'Error getting the data for ' + id + '!'
                    print(msg) 
            # finally concatenate all dfs together
            df = pd.concat(dfs)  
            # save it to file
            df.to_csv(targetFilename)
            print('Download finished.')
        # ensure RS length is 5
        if not df is None:
            df['RS'] = df['RS'].astype(str).str.zfill(5)
        # ...and return df
        return df
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoID', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'WeeklyCases', 
                                                    'WeeklyDeaths', 
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'DailyRecovered', 
                                                    'Incidence7DayPer100Kpopulation']], 
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='Robert Koch-Institut (RKI), dl-de/by-2-0, CMBT 2022', location=[51.3, 10.5], tiles=basemap, zoom_start=6)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        #bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoID', 
                                            'Cases', 
                                            'Deaths', 
                                            'WeeklyCases', 
                                            'WeeklyDeaths', 
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'DailyRecovered', 
                                            'Incidence7DayPer100Kpopulation'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # return the map
        return map

    def __get_county_data_from_web(self, county_ID):
        """ Downloads the covid-19 data for the given county-ID

        Args:
            county_ID string: the county-ID for which we want the data

        Raises:
            ValueError: In case the data is empty

        Returns:
            dataframe: A dataframe of the county data
        """
        # the endpoint of the request
        endpoint = 'https://api.corona-zahlen.org/districts/' + county_ID
        # contact the server
        res = requests.get(endpoint)
        # check if there was a response
        if res.ok:
            # get the json
            res = res.json()
        else:
            # raise an exception
            res.raise_for_status()
        # check if the data is not empty
        if not bool(res['data']):
            raise ValueError("Empty response! County ID might be invalid.")
        df = pd.json_normalize(res['data'])
        df.columns = ['RS', 
                    'GeoName', 
                    'GeoID', 
                    'State', 
                    'Population', 
                    'Cases',
                    'Deaths',
                    'WeeklyCases',
                    'WeeklyDeaths',
                    'StateID',
                    'Recovered',
                    'Incidence7DayPer100Kpopulation', 
                    'CasesPer100kPopulation', 
                    'DailyCases', 
                    'DailyDeaths', 
                    'DailyRecovered']
        return df

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'RS'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias     

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['cartodbpositron',
                    'https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'Stamen Terrain']
        return mapArray

class FoliumCovid19MapDEstates(FoliumCovid19Map):
    """
    This class will expose an interface to deal with Choropleth maps to display Covid-19 data attributes for German states. 
    """
    def __init__(self, dataDirectory = '../data'):
        """ Constructor

        Args:
            dataDirectory (str, optional): The data directory to be used for cached data. Defaults to '../data'.
        """
        # init members
        self.__dataDirectory = dataDirectory + '/'
        self.__dfGeo = None
        self.__dfData = None
        self.__alias = 'MapDEstate'
        self.__statelist = [['Schleswig-Holstein', 'SH'],
                            ['Hamburg', 'HH'],
                            ['Niedersachsen', 'NI'],
                            ['Bremen', 'HB'],
                            ['Nordrhein-Westfalen', 'NW'],
                            ['Hessen', 'HE'],
                            ['Rheinland-Pfalz', 'RP'],
                            ['Baden-Württemberg', 'BW'],
                            ['Bayern', 'BY'],
                            ['Saarland', 'SL'],
                            ['Berlin', 'BE'],
                            ['Brandenburg', 'BB'],
                            ['Mecklenburg-Vorpommern', 'MV'],
                            ['Sachsen', 'SN'],
                            ['Sachsen-Anhalt', 'ST'],
                            ['Thüringen', 'TH']]
        # ensure that the data directory exists, meaning to create it if it is not available
        self.__dataDirectory = ensure_path_exists(dataDirectory)
        # check if it really exists
        if self.__dataDirectory != '':
            # get the geo JSON data frame
            self.__dfGeo = self.__get_geo_data()
            # get the covid data for all counties/cities in the geo dataframe
            if not self.get_geo_df is None:
                self.__dfData = self.__get_covid_data()
        # pass the everything to the base class
        super().__init__(self.__dfGeo, self.__dfData, self.__dataDirectory)

    def __get_geo_data(self):
        """ Downloads the JSON file from the RKI server if necessary and opens it to return a geoPandas dataframe. The function throws an
        exception in case of an error

        Returns:
            geo dataframe: the geo dataframe of the German states or None if it can't load the file
        """
        # init return
        geoDf = None
        # the filename of the geoJSON that is used
        targetFilename = self.__dataDirectory + '/' + 'RKI_Corona_Bundeslaender.geojson'
        # check if it exist already
        if not os.path.exists(targetFilename):
            # download the file
            print('Downloading data, that might take some time...')
            endpoint = 'https://opendata.arcgis.com/api/v3/datasets/ef4b445a53c1406892257fe63129a8ea_0/downloads/data?format=geojson&spatialRefId=4326'
            # the manual download link is
            # 'https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/ef4b445a53c1406892257fe63129a8ea_0/explore'
            try:
                # try to download the file 
                download_JSON_file(endpoint, targetFilename)
                print('Download finished.')
            except Exception as e:
                if hasattr(e, 'message'):
                    print(e.message)
                else:
                    print(e)    
        # now the file should exist
        if os.path.exists(targetFilename):
            # load the file
            geoDf = gpd.read_file(targetFilename)
        # finally return the geo df
        return geoDf

    def __get_covid_data(self):
        """ Downloads the covid-19 data from the RKI servers if necessary, caches them and opens a final csv to return a Pandas dataframe. 
        
        Returns:
            covid dataframe: the covid data for the German states or None if it can't load the file
        """
        # init the result
        df = None
        # get the date
        today = date.today()
        # the prefix of the CSV file is Y-m-d
        preFix = today.strftime('%Y-%m-%d') + "-RKIstates"
        # the target filename of the csv to be downloaded
        targetFilename = self.__dataDirectory + '/' + preFix + '-db.csv'
        # check if it exist already
        if os.path.exists(targetFilename):
            print('using existing file: ' + targetFilename)
            # read the file
            df = pd.read_csv(targetFilename)
        else:
            print('Downloading data, that might take some time...')
            # build a result df
            dfs = []
            for id in self.__statelist:
                try:
                    # get the data for the county
                    df = self.__get_state_data_from_web(id[1])
                    # add it to the list
                    dfs.append(df)
                except:
                    msg = 'Error getting the data for ' + id + '!'
                    print(msg) 
            # finally concatenate all dfs together
            df = pd.concat(dfs)  
            # save it to file
            df.to_csv(targetFilename)
            print('Download finished.')
            #print(df.head())
        # ensure AGS length is 2
        if not df is None:
            df['AGS_TXT'] = df['AGS_TXT'].astype(str).str.zfill(2)
        # ...and return df
        return df
    
    def create_default_map(self, 
                           basemap, coloredAttribute = 'Incidence7DayPer100Kpopulation', 
                           coloredAttributeAlias = '7-day incidence per 100.000 population'):
        """ Returns a default folium map

        Args:
            basemap (str): The name of the basemap to be used. Can be one of the nice_basemaps or something different
            coloredAttribute (str, optional): [description]. Defaults to 'Incidence7DayPer100Kpopulation'.
            coloredAttributeAlias (str, optional): [description]. Defaults to '7-day incidence per 100.000 population'.
        """
        # check if we have every<thing that we need
        if (self.__dfGeo is None) or (self.__dfData is None):
            return None
        # merge geo and data dfs. ensure merging to the geoDF to keep the result a geoPandas df
        combined = self.__dfGeo.merge(self.__dfData[[self.get_merge_UID(), 
                                                    'GeoName', 
                                                    'Cases', 
                                                    'Deaths', 
                                                    'WeeklyCases', 
                                                    'WeeklyDeaths', 
                                                    'DailyCases', 
                                                    'DailyDeaths', 
                                                    'DailyRecovered', 
                                                    'Incidence7DayPer100Kpopulation',
                                                    'HospitalizationCases7']], 
                                                    on=self.get_merge_UID(), 
                                                    how='left')
        # create the map
        map = folium.Map(attr='Robert Koch-Institut (RKI), dl-de/by-2-0, CMBT 2022', location=[51.3, 10.5], tiles=basemap, zoom_start=6)
        # the alias incl. the date
        coloredAttributeAlias = coloredAttributeAlias + ' as of ' + date.today().strftime('%Y-%m-%d')
        # the bins for the colored values
        #bins = list(combined[coloredAttribute].quantile([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]))
        # the maximum in the coloredAttribute column
        max = self.__dfData[coloredAttribute].max()
        # fixed bins
        bins = [0, 150, 300, 450, 600, 750, 900, 1050, 1200, max]
        # build the choropleth
        cp = folium.Choropleth (geo_data=combined,
                                data=combined,
                                #data=df,
                                columns=[self.get_merge_UID(), coloredAttribute],
                                key_on='feature.properties.' + self.get_merge_UID(),
                                fill_color='YlOrRd',
                                fill_opacity=0.4,
                                line_opacity=0.4,
                                nan_fill_color='#f5f5f3',
                                legend_name=coloredAttributeAlias,
                                bins=[float(x) for x in bins],
                                highlight=True,
                                smooth_factor = 0.1)
        # give it a name
        cp.layer_name = "Covid-19 data"  
        # add it to the map
        cp.add_to(map)
        # create a tooltip for hovering
        tt = folium.GeoJsonTooltip(fields= ['GeoName', 
                                            'Cases', 
                                            'Deaths', 
                                            'WeeklyCases', 
                                            'WeeklyDeaths', 
                                            'DailyCases', 
                                            'DailyDeaths', 
                                            'DailyRecovered', 
                                            'Incidence7DayPer100Kpopulation',
                                            'HospitalizationCases7'])
        # add it to the json
        tt.add_to(cp.geojson)
        # numbers and dates in the system local
        tt.localize = True
        # add a layer control to the map
        folium.LayerControl().add_to(map)
        # a legend
        #legend_html = '<div style="position: fixed; bottom: 75px; left: 50%; margin-left: -350px; width: 700px; height: 20px; z-index:9999; font-size:20px;">&nbsp; ' + 'Generated on ' + date.today().strftime('%Y-%m-%d') + '<br></div>'
        #map.get_root().html.add_child(folium.Element(legend_html))
        # return the map
        return map

    def __get_state_data_from_web(self, state_ID):
        """ Downloads the covid-19 data for the given county-ID

        Args:
            county_ID string: the county-ID for which we want the data

        Raises:
            ValueError: In case the data is empty

        Returns:
            dataframe: A dataframe of the county data
        """
        # the endpoint of the request
        endpoint = 'https://api.corona-zahlen.org/states/' + state_ID
        # contact the server
        res = requests.get(endpoint)
        # check if there was a response
        if res.ok:
            # get the json
            res = res.json()
        else:
            # raise an exception
            res.raise_for_status()
        # check if the data is not empty
        if not bool(res['data']):
            raise ValueError("Empty response! County ID might be invalid.")
        df = pd.json_normalize(res['data'])
        # adjust column names
        df.columns = ['AGS_TXT', 
                    'GeoName', 
                    'Population', 
                    'Cases',
                    'Deaths',
                    'WeeklyCases',
                    'WeeklyDeaths',
                    'Recovered',
                    'GeoID',
                    'Incidence7DayPer100Kpopulation', 
                    'CasesPer100kPopulation', 
                    'DailyCases', 
                    'DailyDeaths', 
                    'DailyRecovered',
                    'HospitalizationCases7',
                    'HospitalizationIncidence7',
                    'HospitalizationDate',
                    'HospitalizationUpdate']
        return df

    def get_merge_UID(self):
        """
        Returns the string holding the name of the unique ID of the data and the geo dataframe that can be used to merge the two

        Returns:
            string: A string holding the name of the unique ID of the data dataframe 
        """
        return 'AGS_TXT'

    def get_map_alias(self):
        """
        Returns the string holding the name of the map that can be used to save it

        Returns:
            string: A string holding the name of the unique ID of the geo dataframe 
        """
        return self.__alias     

    def get_nice_basemaps(self):
        """
        Returns an array of strings referring to nice basemaps for the specific region. At least one basemaps should be given and 
        the preferred basemap should be basemap[0]

        Returns:
            string: A array of strings referring to nice basemaps 
        """
        mapArray = ['cartodbpositron',
                    'https://server.arcgisonline.com/arcgis/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{z}/{y}/{x}',
                    'Stamen Terrain']
        return mapArray


# The main function

In [7]:
import os

def main():
    # the directory for temp. data as well as for the output
    if 'google.colab' in str(get_ipython()):    
        outputDir = '/content/data/'
        print('Running on colab. Using ' + outputDir + ' as the data directory')
    else:
        # the absolute directory of this python file
        absDirectory = os.path.dirname(os.path.abspath(os.path.abspath('')))
        # the target filename
        outputDir = os.path.join(absDirectory, './data/')
        print('Not running on colab. Using ' + outputDir + ' as the data directory')  
    
    # an array of instances
    mapObjects = []
    #"""
    # world
    mapObjects.append(FoliumCovid19MapWorld(outputDir))
    # africa
    mapObjects.append(FoliumCovid19MapAfrica(outputDir))
    # oceania
    mapObjects.append(FoliumCovid19MapOceania(outputDir))
    # america
    mapObjects.append(FoliumCovid19MapAmerica(outputDir))
    # asia
    mapObjects.append(FoliumCovid19MapAsia(outputDir))
    # europe
    mapObjects.append(FoliumCovid19MapEurope(outputDir))
    #"""
    #"""
    # de states
    mapObjects.append(FoliumCovid19MapDEstates(outputDir))
    # de counties
    mapObjects.append(FoliumCovid19MapDEcounties(outputDir))
    #"""
    
    # process the maps
    for mapObject in mapObjects:
        if mapObject.get_geo_df() is None:
            return
        # get the data directory
        dir = mapObject.get_data_directory()
        # select a basemap
        basemap = mapObject.get_nice_basemaps()[0]
        # build the default map
        map = mapObject.create_default_map(basemap)
        # save the map
        map.save(outputDir + mapObject.get_map_alias() + '.html')  
        if mapObject.get_map_alias().find('World') > 0:
            # build another map of the world
            map = mapObject.create_default_map(basemap, 'PercentDeaths', 'Case Fatality Rate (CFR)')
            # save that as well
            map.save(outputDir + mapObject.get_map_alias() + 'CFR.html')  
            # build another map of the world
            map = mapObject.create_default_map(basemap, 'CasesPerMillionPopulation', 'Cases per million population')
            # save that as well
            map.save(outputDir + mapObject.get_map_alias() + 'CasesPerMillionPopulation.html')  
            map
    return

if __name__ == "__main__":
    main()


Not running on colab. Using /Users/cmbt/Documents/GitHub/Covid-19-analysis/./data/ as the data directory
Downloading data, that might take some time...
Download finished.
using existing file: /Users/cmbt/Documents/GitHub/Covid-19-analysis/./data//2022-01-21-RKIstates-db.csv
