# Addition of new deaths/confirmed and smoothed (7 days) data

In [1]:
import matplotlib.pyplot as plt 
%matplotlib inline
import pandas as pd
import numpy as np
from IPython import display

## 1. Death Data

In [2]:
# Reading file as dictionary
df = pd.read_excel('Spain_OWID_Confirmed_Deaths.xlsx')
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths
0,ESP,Europe,Spain,2020-01-22,0,0.0,0,0.0
1,ESP,Europe,Spain,2020-01-23,0,0.0,0,0.0
2,ESP,Europe,Spain,2020-01-24,0,0.0,0,0.0
3,ESP,Europe,Spain,2020-01-25,0,0.0,0,0.0
4,ESP,Europe,Spain,2020-01-26,0,0.0,0,0.0


In [3]:
# Obtaining the deaths values since the beginning
T_deaths_original = df.values[:,6]

T_deaths_original[0:50]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3,
       5, 10, 17, 28, 35, 54], dtype=object)

In [4]:
# Creation of New Deaths (Death[t] - Death[t-1])

N_deaths = np.empty(len(T_deaths_original), dtype = object)
N_deaths[0] = 0  # First value is 0
for i in range(1, len(N_deaths)):
    result = T_deaths_original[i] - T_deaths_original[i-1]
    if result >= 0:
        N_deaths[i] = result
    else:
        N_deaths[i] = 0

N_deaths.shape, N_deaths

((793,),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        2, 5, 7, 11, 7, 19, 1, 78, 62, 94, 53, 191, 90, 207, 213, 332, 397,
        539, 497, 839, 718, 773, 844, 821, 913, 748, 923, 961, 850, 749,
        694, 700, 704, 747, 655, 634, 525, 603, 547, 300, 652, 607, 687,
        41, 410, 399, 430, 435, 440, 367, 378, 288, 331, 301, 453, 268, 0,
        557, 164, 164, 185, 244, 213, 229, 179, 143, 123, 176, 184, 217,
        138, 104, 0, 146, 69, 110, 52, 688, 50, 74, 0, 283, 0, 2, 2, 4, 2,
        0, 0, 1, 5, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1179, 7, 1,
        1, 1, 2, 3, 8, 3, 2, 3, 9, 9, 4, 17, 0, 0, 3, 4, 4, 5, 2, 0, 0, 3,
        3, 4, 3, 4, 0, 0, 2, 2, 2, 3, 3, 0, 0, 2, 2, 5, 2, 2, 0, 0, 27, 26,
        1, 1, 3, 0, 0, 73, 5, 0, 26, 12, 0, 0, 29, 24, 127, 16, 25, 0, 0,
        34, 52, 47, 25, 15, 0, 0, 83, 58, 42, 40, 184, 0, 0, 98, 78, 34,
        71, 48, 0, 0, 101,

In [5]:
# Creation of New Deaths Smoothed (7 days average)

N_deaths_smooth = np.empty(len(T_deaths_original), dtype = object) 

# First 6 values set null to maintain the shape (793 data points)
for i in range(0, 6):
    N_deaths_smooth[i] = 0

# Moving average in 7 days
for i in range(0, len(T_deaths_original)-6):
    sum_N = 0
    for j in range(i, i+7):
        sum_N = sum_N + N_deaths[j]
    N_deaths_smooth[i+6] = (sum_N/7)

N_deaths_smooth.shape, N_deaths_smooth

((793,),
 array([0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.14285714285714285, 0.2857142857142857, 0.42857142857142855,
        0.7142857142857143, 1.4285714285714286, 2.4285714285714284, 4.0,
        4.857142857142857, 7.428571428571429, 7.428571428571429,
        18.285714285714285, 26.428571428571427, 38.857142857142854,
        44.857142857142854, 71.14285714285714, 81.28571428571429,
        110.71428571428571, 130.0, 168.57142857142858, 211.85714285714286,
        281.2857142857143, 325.0, 432.0, 505.0, 585.0, 658.1428571428571,
        718.7142857142857, 772.1428571428571, 808.0, 820.0,
        854.7142857142857, 865.7142857142857, 852.1428571428571, 834.0,
        803.5714285714286, 797.2857142857143, 772.1428571428571,
        728.4285714285714, 697.5714285714286, 665.5714285714286,
        652.571428571

## 2. Confirmed Data

In [6]:
# Reading file as dictionary
df = pd.read_excel('Spain_OWID_Confirmed_Deaths.xlsx')
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths
0,ESP,Europe,Spain,2020-01-22,0,0.0,0,0.0
1,ESP,Europe,Spain,2020-01-23,0,0.0,0,0.0
2,ESP,Europe,Spain,2020-01-24,0,0.0,0,0.0
3,ESP,Europe,Spain,2020-01-25,0,0.0,0,0.0
4,ESP,Europe,Spain,2020-01-26,0,0.0,0,0.0


In [7]:
# Obtaining the confirmed values since the beginning
T_cases_original = df.values[:,4]
N_cases_original = df.values[:,5]

T_cases_original[0:20], N_cases_original[0:20]

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2],
       dtype=object),
 array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], dtype=object))

In [8]:
# Creation of New Confirmed (Confirmed[t] - Confirmed[t-1])

N_confirmed = np.empty(len(T_cases_original), dtype = object)
N_confirmed[0] = 0  # First value is 0
for i in range(1, len(N_confirmed)):
    result = T_cases_original[i] - T_cases_original[i-1]
    if result >= 0:
        N_confirmed[i] = result
    else:
        N_confirmed[i] = 0

N_confirmed.shape, N_confirmed

((793,),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 7, 2, 17, 13, 39, 36, 45,
        57, 37, 141, 100, 173, 400, 622, 582, 0, 2955, 1159, 1407, 2144,
        1806, 2162, 4053, 2447, 4964, 3394, 6368, 4749, 9630, 8271, 7933,
        7516, 6875, 7846, 7967, 8195, 7947, 7134, 6969, 5478, 5029, 5267,
        6278, 5002, 5051, 4754, 3804, 3268, 2442, 5103, 7304, 5891, 887,
        6948, 1536, 3968, 4211, 4635, 0, 2915, 1729, 1831, 1308, 2144, 518,
        1781, 1366, 884, 545, 1318, 996, 1122, 1410, 721, 772, 3086, 594,
        661, 849, 643, 515, 0, 908, 431, 518, 482, 1787, 466, 482, 0, 859,
        0, 1647, 658, 664, 251, 159, 294, 394, 334, 318, 332, 240, 167,
        249, 314, 427, 502, 396, 323, 181, 219, 355, 585, 307, 363, 334,
        232, 248, 334, 400, 419, 564, 301, 200, 301, 388, 444, 442, 0, 0,
        1244, 341, 383, 543, 852, 0, 0, 2045, 666, 875, 1361, 1400, 0, 0,
        4581, 1358, 1357, 2615, 

In [9]:
# Creation of New Confirmed Smoothed (7 days average)

N_confirmed_smooth = np.empty(len(T_cases_original), dtype = object)

# First 6 values set null to maintain the shape (793 data points)
for i in range(0, 6):
    N_confirmed_smooth[i] = 0

# Moving average in 7 days
for i in range(0, len(N_confirmed)-6):
    sum_N = 0
    for j in range(i, i+7):
        sum_N = sum_N + N_confirmed[j]
    N_confirmed_smooth[i+6] = (sum_N/7)

N_confirmed_smooth.shape, N_confirmed_smooth

((793,),
 array([0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.14285714285714285,
        0.14285714285714285, 0.14285714285714285, 0.14285714285714285,
        0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.0,
        0.14285714285714285, 0.14285714285714285, 0.14285714285714285,
        0.14285714285714285, 0.14285714285714285, 0.14285714285714285,
        0.14285714285714285, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.5714285714285714, 1.5714285714285714, 1.8571428571428572,
        4.285714285714286, 6.142857142857143, 11.714285714285714,
        16.857142857142858, 22.714285714285715, 29.857142857142858,
        34.857142857142854, 52.57142857142857, 65.0, 84.14285714285714,
        136.14285714285714, 218.57142857142858, 293.57142857142856,
        288.2857142857143, 690.2857142857143, 841.5714285714286,
        1017.8571428571429, 1267.0, 1436.142857142857, 1661.857142857143,
        2240.8571428571427, 2168.285714285714, 2711.8571428571427,
        2995.

In [10]:
# Collecting the dates

dates = df.values[:,3]
dates.shape, dates[0:15]

((793,),
 array([Timestamp('2020-01-22 00:00:00'), Timestamp('2020-01-23 00:00:00'),
        Timestamp('2020-01-24 00:00:00'), Timestamp('2020-01-25 00:00:00'),
        Timestamp('2020-01-26 00:00:00'), Timestamp('2020-01-27 00:00:00'),
        Timestamp('2020-01-28 00:00:00'), Timestamp('2020-01-29 00:00:00'),
        Timestamp('2020-01-30 00:00:00'), Timestamp('2020-01-31 00:00:00'),
        Timestamp('2020-02-01 00:00:00'), Timestamp('2020-02-02 00:00:00'),
        Timestamp('2020-02-03 00:00:00'), Timestamp('2020-02-04 00:00:00'),
        Timestamp('2020-02-05 00:00:00')], dtype=object))

In [11]:
# Checking if dates and data points have the same length
len(dates), len(T_cases_original)

(793, 793)

In [12]:
# Transform day format to string
from datetime import datetime

dates_str = np.empty(len(dates), dtype=object)
for i in range(0, len(dates)):
    dates_str[i] = dates[i].strftime("%Y-%m-%d")  # '2020-03-21' Format

len(dates_str), dates_str[0:10]

(793,
 array(['2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25',
        '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29',
        '2020-01-30', '2020-01-31'], dtype=object))

In [13]:
# Selecting only the range of dates in which we have previosly calculated the final parameters
first_day = '2020-03-21'
last_day = '2022-03-18'

first_index = np.where(dates_str == first_day)[0][0]
last_index = np.where(dates_str == last_day)[0][0]

final_dates = dates_str[first_index:last_index+1]
final_confirmed = N_confirmed[first_index:last_index+1]
final_confirmed_smooth = N_confirmed_smooth[first_index:last_index+1]
final_deaths = N_deaths[first_index:last_index+1]
final_deaths_smooth = N_deaths_smooth[first_index:last_index+1]

len(final_dates), len(final_confirmed), len(final_confirmed_smooth), len(final_deaths), len(final_deaths_smooth), final_dates[0:5], final_dates[-5:len(final_dates)]

(728,
 728,
 728,
 728,
 728,
 array(['2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24',
        '2020-03-25'], dtype=object),
 array(['2022-03-14', '2022-03-15', '2022-03-16', '2022-03-17',
        '2022-03-18'], dtype=object))

In [14]:
# Create array with the name of the country
region = np.empty(len(final_dates), dtype = object)
for i in range(len(region)):
    region[i] = 'Spain'
    
region.shape, region[0:10]

((728,),
 array(['Spain', 'Spain', 'Spain', 'Spain', 'Spain', 'Spain', 'Spain',
        'Spain', 'Spain', 'Spain'], dtype=object))

In [15]:
# Array concatenation for building the final dataframe

columns = 6  # Number of columns
X_total = np.empty((len(final_dates), columns), dtype = object)

for i in range(len(final_dates)):
    X_total[i] = [region[i], final_dates[i], final_confirmed[i], 
                  final_confirmed_smooth[i], final_deaths[i], final_deaths_smooth[i]]
    
X_total.shape, X_total[0:10]

((728, 6),
 array([['Spain', '2020-03-21', 4964, 2711.8571428571427, 332,
         168.57142857142858],
        ['Spain', '2020-03-22', 3394, 2995.714285714286, 397,
         211.85714285714286],
        ['Spain', '2020-03-23', 6368, 3599.1428571428573, 539,
         281.2857142857143],
        ['Spain', '2020-03-24', 4749, 4019.5714285714284, 497, 325.0],
        ['Spain', '2020-03-25', 9630, 5086.428571428572, 839, 432.0],
        ['Spain', '2020-03-26', 8271, 5689.0, 718, 505.0],
        ['Spain', '2020-03-27', 7933, 6472.714285714285, 773, 585.0],
        ['Spain', '2020-03-28', 7516, 6837.285714285715, 844,
         658.1428571428571],
        ['Spain', '2020-03-29', 6875, 7334.571428571428, 821,
         718.7142857142857],
        ['Spain', '2020-03-30', 7846, 7545.714285714285, 913,
         772.1428571428571]], dtype=object))

In [16]:
# Labels of each column of the dataframe
labels = ['Country/Region', 'Date', 'Daily Confirmed', 'Daily Confirmed Smooth (7 days)', 
         'Daily Deaths', 'Daily Deaths Smooth (7 days)']

In [17]:
# Creation of the final dataframe
df_final = pd.DataFrame(data=X_total, columns=labels)
df_final

Unnamed: 0,Country/Region,Date,Daily Confirmed,Daily Confirmed Smooth (7 days),Daily Deaths,Daily Deaths Smooth (7 days)
0,Spain,2020-03-21,4964,2711.857143,332,168.571429
1,Spain,2020-03-22,3394,2995.714286,397,211.857143
2,Spain,2020-03-23,6368,3599.142857,539,281.285714
3,Spain,2020-03-24,4749,4019.571429,497,325.0
4,Spain,2020-03-25,9630,5086.428571,839,432.0
...,...,...,...,...,...,...
723,Spain,2022-03-14,0,12507.0,0,80.142857
724,Spain,2022-03-15,36066,14352.285714,281,79.571429
725,Spain,2022-03-16,0,11218.571429,0,60.571429
726,Spain,2022-03-17,0,7987.857143,0,48.428571


In [18]:
# Export dataframe to a csv file

from pathlib import Path  
filepath = Path('Daily_Confirmed_Deaths_Spain.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df_final.to_csv(filepath, index=False)