In [69]:
# Import Built in modules
import json # JSON files utilities
from urllib.request import urlretrieve # retrieve files from urls
import os # operative system utilities
import re # regular expressions


# Third-party modules
import locale
locale.setlocale(locale.LC_TIME,'es_ES')

import matplotlib
from matplotlib import pyplot as plt
import matplotlib.dates as mdates

import pandas as pd
import geopandas as gpd
import numpy as np

import ipywidgets as ipw
from ipywidgets import interact,interactive

In [70]:
# Function to download the csv data file from the webpage of the health ministry in
# the correct encoding and removing unwanted symbols in the data rows like '*'
def downloadDATA(data_url, data_file_name = 'data.csv'):
    """"
    Function to download the csv data file from the webpage of the health ministry in
    the correct encoding and removing unwanted symbols in the data rows like '*'
    
    args:
    
    - "data_url" : URL to the csv file.
    - "data_file_name": name of the downloaded csv file
    
    returns a file handle.
    """
    data_file_unprocessed_name = data_url.split('/')[-1]
    path, HTTP_Message = urlretrieve(data_url,data_file_unprocessed_name);
    # The original csv file is encoded in 'iso-8859-1' and has some 
    # '*' symbols for foot notes. Write a new csv file with 'utf-8'
    # encoding and remove the '*' symbols.
    unwanted = r'[*]' #regex string with the set of unwanted characters. 
    with open(data_file_unprocessed_name,'r',encoding='iso-8859-1') as f_in, open(data_file_name,'w') as f_out:
        for line in f_in:
            f_out.write( re.sub(unwanted,'',line) ) #regex substitution of unwanted characters
        os.remove(data_file_unprocessed_name) #Removed unprocessed data csv file
        return f_out

In [71]:
# Function to make a data frame for the national data from the csv file

def makeNationalDataFrame(data_file_name = 'data.csv'):
    # Get the names of the columns of the csv file
    data_head = list( pd.read_csv(data_file_name,nrows=0).columns )
    # Data types for the columns
    data_types = {
        data_head[0]:str,
        data_head[1]:str,
        data_head[2]:np.float64,
        data_head[3]:np.float64,
        data_head[4]:np.float64,
        data_head[5]:np.float64,
        data_head[6]:np.float64,
    }
    fill_na_dict =  {
        data_head[0]:'',
        data_head[1]:'',
        data_head[2]:0.,
        data_head[3]:0.,
        data_head[4]:0.,
        data_head[5]:0.,
        data_head[6]:0.
    }
    # Load the data into a pandas data frame with the correct data types
    data = pd.read_csv(data_file_name,dtype=data_types)
    # Substitute the NA values with 0s
    data.fillna(fill_na_dict,inplace=True)
    # strip tailing whitespaces in the column names
    data.rename(columns=lambda name : name.strip(),inplace=True)
    # Put the dates in the YYYY-MM-DD format and datetime64 type
    data[data.columns[1]] = pd.to_datetime(data[data.columns[1]],format='%d/%m/%Y',errors='coerce')
    # remove the rows with no date values
    data.dropna(inplace=True)
    # sort the rows by date value
    data.sort_values(by=[data.columns[1]],ignore_index=True,inplace=True)
    # return the data frame
    return data
    

In [72]:
# Function to create a dictionary with a dataframe for every CCAA.
# The key is the CCAA name

def makeCommunitiesDataFrameDict(national_data_frame,CCAA_dict):
    CCAA_label = national_data_frame.columns[0]
    communities_data_frames_dict = dict()
    for name,code in CCAA_dict.items():
        df = national_data_frame[national_data_frame[CCAA_label]==code]
        df.reset_index(drop=True,inplace=True)
        communities_data_frames_dict[name] = df
    return communities_data_frames_dict

In [73]:
# Function to make a stacked area plot of the evolution of COVID-19 in a autonomous community
def plot_CCAA_area_plot(CCAA_name,communities_data_frames_dict,ax=None):
    
    try:
        df = communities_data_frames_dict[CCAA_name]
    except:
        print('¡Introduzca una comunidad autónoma válida!')
        return None
    
    # Create a figure if no axes are provided
    if not ax:
        fig, ax = plt.subplots(constrained_layout=True);
    
    # Make the plot  
    df.plot.area(
        x = date_column_name,
        y = [
            activeCases_column_name
            ,deaths_column_name
            ,recovered_column_name
        ],
        color = [
            'blue',
            'red',
            'green'
        ],
        ax = ax
    )
#     df.plot.line(
#         x = date_column_name,
#         y = cases_column_name,
#         ax = ax
#     )
    
    # Set grid for the plot
    # ax.grid(True)
    
    # ticks rotation
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    # set ticks every week
    ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    # set major ticks format
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
    ax.xaxis.set_minor_formatter(mdates.DateFormatter(''))
    
    # axes labels
    ax.set_xlabel('')
    ax.set_ylabel('Casos')
    
    # plot title
    ax.set_title(CCAA_name)
    
    return None

In [74]:
# Function to make a map chart of the actual situation in Spain

def plotMap(column_name,merged_data,ax = None): 
    
    # Values range for the color maps
    vmin = min(merged_data[column_name])
    vmax = max(merged_data[column_name])
    
    # Color map for the plots
    cmap = 'YlGn'

    # Create a figure if no axes are provided
    if not ax:
        fig, ax = plt.subplots(constrained_layout=True);   
    
    # title for the plot
    ax.set_title(column_name)
    # face color of the plot
    ax.set_facecolor('#00FFFF');
    # deactivate grid
    ax.grid(False)
    # remove the ticks
    plt.xticks([],[])
    plt.yticks([],[])

    # Make the plot
    merged_data.plot(
        column=column_name,
        cmap=cmap,
        edgecolors='lightblue',
        vmin = vmin,
        vmax = vmax,
        ax=ax
    );

    sm = plt.cm.ScalarMappable(cmap=cmap,norm=plt.Normalize(vmin=vmin,vmax=vmax))
    fig.colorbar(sm,ax=ax,orientation='horizontal');


In [75]:
# The codes for the autonomous comunities (CCAA) use the ISO convention 
# 'ISO 3166-2:ES' : https://www.iso.org/obp/ui/es/#iso:code:3166:ES.

# I make a mapping from the names of the CCAA to the ISO codes with a dictionary

communities ="""
ES-AN,Andalucía
ES-AR,Aragón
ES-AS,Asturias
ES-CN,Canarias
ES-CB,Cantabria
ES-CM,Castilla La Mancha
ES-CL,Castilla y León
ES-CT,Catalunya
ES-EX,Extremadura
ES-GA,Galiza
ES-IB,Illes Balears
ES-RI,La Rioja
ES-MD,Madrid
ES-MC,Murcia
ES-NC,Navarra
ES-PV,Euskadi
ES-VC,Comunitat Valenciana
ES-CE,Ceuta
ES-ME,Melilla
"""

# Dictionary relating "CCA names":"ISO CODES"
CCAA_dict = {}

for line in communities.strip().split('\n'):
    code,name = line.split(',')
    code = code.replace('ES-','')
    CCAA_dict[name] = CCAA_dict.get(name,code)
# Tuples to store the names and codes of the CCAA
CCAA_names_tuple = tuple( sorted(CCAA_dict.keys()) )
CCAA_codes_tuple = tuple( sorted(CCAA_dict.values()) )

In [76]:
# The CCAA are encoded with cartographic IDS from 1 to 19. Make a dictionary to
# translate them into CCAA ISO codes

cartodb_ID_str ="""
ES-AN,16
ES-AR,15
ES-AS,14
ES-CN,19
ES-CB,12
ES-CM,10
ES-CL,11
ES-CT,9
ES-EX,7
ES-GA,6
ES-IB,13
ES-RI,17
ES-MD,5
ES-MC,4
ES-NC,3
ES-PV,2
ES-VC,8
ES-CE,18
ES-ME,1
"""

# Dictionary relating "cartographic ID numbers":"ISO CODES"
CCAA_cartodb_ID_dict = {}

for line in cartodb_ID_str.strip().split('\n'):
    code,ID = line.split(',')
    ID = int(ID)
    code = code.replace('ES-','')
    CCAA_cartodb_ID_dict[ID] = CCAA_cartodb_ID_dict.get(ID,code)

In [77]:
#                Script to obtain COVID data in SPAIN
#
# - data_COVID19_spain = data frame with all the data for spain
# - data_COVID19_spain_last = data frame with the last update
# - commcommunities_data_frames_dict = dictionary containing data frames for 
#                                    every community. {'name':dataframe.}
# - data_COVID19_spain_sum = data frame for the total data in Spain, i.e., 
#                            summed over the communities.


# url for the data and name for the csv file
data_url = 'https://covid19.isciii.es/resources/serie_historica_acumulados.csv'
data_file_name = 'data.csv'

# Download the data csv file and make the national and regional data frames
downloadDATA(data_url,data_file_name)
data_COVID19_spain = makeNationalDataFrame()
data_COVID19_columns = data_COVID19_spain.columns.to_list()

# Column names for the data
ISO_code_column_name = data_COVID19_columns[0]
date_column_name = data_COVID19_columns[1]
cases_column_name = data_COVID19_columns[2]
deaths_column_name = data_COVID19_columns[5]
recovered_column_name = data_COVID19_columns[6]
activeCases_column_name = 'Casos Activos'

# Make a column for the ACTIVE cases
data_COVID19_spain[activeCases_column_name] = data_COVID19_spain[cases_column_name] - data_COVID19_spain[deaths_column_name] - data_COVID19_spain[recovered_column_name]

# Data set with the last update
data_COVID19_spain_last = pd.DataFrame(data_COVID19_spain[data_COVID19_spain['Fecha']==max(data_COVID19_spain['Fecha'])])
data_COVID19_spain_last.reset_index(drop=True,inplace=True)

# Dictionary with a dataframe for everey comunidad autonoma
communities_data_frames_dict = makeCommunitiesDataFrameDict(data_COVID19_spain,CCAA_dict)

# Data set with the sum of cases for every community
dates_list = list(dict.fromkeys(data_COVID19_spain[date_column_name]))
dates_list.sort()
data_COVID19_spain_sum = pd.DataFrame(
    {
        date_column_name:dates_list,
        cases_column_name: sum( df[cases_column_name] for df in communities_data_frames_dict.values() ),
        deaths_column_name: sum( df[deaths_column_name] for df in communities_data_frames_dict.values() ),
        recovered_column_name: sum( df[recovered_column_name] for df in communities_data_frames_dict.values() ),
        activeCases_column_name: sum( df[activeCases_column_name] for df in communities_data_frames_dict.values() )
    }
)

In [78]:
#                Script to make a geoDataFrame for the spain geometry

# Data set with the mapa data
map_df = gpd.read_file('shapefiles_espana_ccaa_1')
# pop columns that I don't need
map_df.pop('codaut_sin');
map_df.pop('codaut');

# pop the column with the cartographic CCAA codes
map_df_cartoID_column = map_df.pop('cartodb_id')
# transform it into CCAA ISO codes
ISO_code_column_data = [CCAA_cartodb_ID_dict[x] for x in map_df_cartoID_column]

map_df.insert(0, ISO_code_column_name, ISO_code_column_data, True)

In [79]:
#                Script to merge the geoDATAFrame with the DATA frame of the lastupdate

# merge the coronavirus data set and the map data set
merged_data = map_df.set_index(ISO_code_column_name).join(data_COVID19_spain_last.set_index(ISO_code_column_name))
merged_data.reset_index(inplace=True);

In [80]:
# Widget to display the evolution of COVID-19 in a  CCAA

# Make a selection widget for the CCAA
CCAA_selector = ipw.Dropdown(
    options = CCAA_names_tuple,
    value=CCAA_names_tuple[0],
    description='',
    disabled = False,
    continuous_update = False
)
# Label for the widget
CCAA_selector_label = ipw.Label('Comunidad autónoma: ')

# Interactive widget to plot the evolution of COVID-19 in a CCAA
w1 = interactive(
    lambda x: plot_CCAA_area_plot(x,communities_data_frames_dict),
    x=CCAA_selector
)
CCAA_selector.description = ''

# The finished widget for the evolution
evol_widget = ipw.VBox([CCAA_selector_label,w1])

In [81]:
# Widget to display the chart map

# widget to select the data to display
map_selector = ipw.RadioButtons(
    options=[
        cases_column_name,
        activeCases_column_name,
        deaths_column_name,
        recovered_column_name
    ],
    layout={'width': 'max-content'},
    disabled=False
)

# map widget
map_widget = interactive(
    lambda x: plotMap(x,merged_data),
    x = map_selector
)
map_selector.description=''

In [82]:
# Final map App
App = ipw.HBox([map_widget,evol_widget])

In [83]:
# Show Output
App

HBox(children=(interactive(children=(RadioButtons(layout=Layout(width='max-content'), options=('Casos', 'Casos…