# Compute growth rate per country

## Auxiliary functions

Basic growth rate $r(t) = \frac{x(t+1)}{x(t)}$

In [1]:
growthRate <- function(ts){
    # x(t) = head(x,-1)
    # x(t+1) = tail(x,-1)
    return(c(NaN,tail(ts,-1)/head(ts,-1)))
}

Normalized growth rate $r_N(t) = \frac{x(t+1)-x(t)}{x(t)}$

In [2]:
growthRateNormalized <- function(ts){
    # x(t) = head(x,-1)
    # x(t+1) = tail(x,-1)
    return(c(NaN,(tail(ts,-1)-head(ts,-1))/head(ts,-1)))
}


In [3]:
splitData <- function(province_country_pair,data.df){
    data.df[data.df$Province.State == province_country_pair[1] &
            data.df$Country.Region == province_country_pair[2]
            ,]
}

computeGrowthRate <- function(country.df,growth_rate_function){
    country.df$ConfirmedGrowthRate <- growth_rate_function(country.df$Confirmed)
    country.df$RecoveredGrowthRate <- growth_rate_function(country.df$Recovered)
    country.df$DeathsGrowthRate <- growth_rate_function(country.df$Deaths)
    return(country.df)
}


## Read data in

In [4]:
data.df <- readRDS("data/COVID19_Global_Italy.Rdata")

In [5]:
head(data.df)

Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,Date,Confirmed,Recovered,Deaths,HospitalizedWSymptoms,ICU,TotalHospitalized,HomeIsolation,DailyConfirmed,Total,Tests
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>,<date>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
22,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,,,,,,,
87,,Albania,41.1533,20.1683,2020-01-22,0,0,0,,,,,,,
152,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,,,,,,,
217,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,,,,,,,
282,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,,,,,,,
347,,Antigua and Barbuda,17.0608,-61.7964,2020-01-22,0,0,0,,,,,,,


## Split data accoring to unique Country-Province Pair

In [6]:
unique_pairs.df <- unique(data.df[,c("Province.State","Country.Region")])
unique_pairs.list <- mapply(c, as.character(unique_pairs.df$Province.State), unique_pairs.df$Country.Region, SIMPLIFY = FALSE)

In [7]:
split_data.list <- lapply(unique_pairs.list,splitData,data.df)

In [8]:
head(split_data.list[[1]])

Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,Date,Confirmed,Recovered,Deaths,HospitalizedWSymptoms,ICU,TotalHospitalized,HomeIsolation,DailyConfirmed,Total,Tests
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>,<date>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
22,,Afghanistan,33,65,2020-01-22,0,0,0,,,,,,,
23,,Afghanistan,33,65,2020-01-23,0,0,0,,,,,,,
24,,Afghanistan,33,65,2020-01-24,0,0,0,,,,,,,
25,,Afghanistan,33,65,2020-01-25,0,0,0,,,,,,,
26,,Afghanistan,33,65,2020-01-26,0,0,0,,,,,,,
27,,Afghanistan,33,65,2020-01-27,0,0,0,,,,,,,


## Compute actual growth rate

In [9]:
augmented_data.list <- lapply(split_data.list,computeGrowthRate,growthRateNormalized)

In [10]:
head(augmented_data.list[[1]])

Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,Date,Confirmed,Recovered,Deaths,HospitalizedWSymptoms,ICU,TotalHospitalized,HomeIsolation,DailyConfirmed,Total,Tests,ConfirmedGrowthRate,RecoveredGrowthRate,DeathsGrowthRate
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>,<date>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>
22,,Afghanistan,33,65,2020-01-22,0,0,0,,,,,,,,,,
23,,Afghanistan,33,65,2020-01-23,0,0,0,,,,,,,,,,
24,,Afghanistan,33,65,2020-01-24,0,0,0,,,,,,,,,,
25,,Afghanistan,33,65,2020-01-25,0,0,0,,,,,,,,,,
26,,Afghanistan,33,65,2020-01-26,0,0,0,,,,,,,,,,
27,,Afghanistan,33,65,2020-01-27,0,0,0,,,,,,,,,,


## Merge data

In [11]:
augmented_data.df <- Reduce(rbind,augmented_data.list)

## Sanity check

In [12]:
# Augmented and original datasets should have the same number of rows
dim(augmented_data.df)[1] == dim(data.df)[1]

## Save data

In [14]:
saveRDS(augmented_data.df,"data/COVID19_Global_Italy_wGrowth.Rdata",version=2)