# Growth rate estimates

Code to read in information per country on total number of cases and number of days since case 100
Then to fit the data, obtaining the date of inflection and the initial growth rate

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors

import csv
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit

import pandas 

from scipy.optimize import curve_fit

%matplotlib inline

In [7]:
#Fit functions
def logistic_model(x,a,b,c):
    return c/(1+np.exp(-(x-b)/a))

def exponential_model(x,a,b,c):
    return a*np.exp(b*(x-c))

def linear_fit(x,m,c):
    return x*m + c

In [27]:
#Calculate chi-squared goodness of fit
def chisquare(data,expct):
    return sum((data-expct)**2 / expct)

In [130]:
#Function to obtain growth rate from data file
#Input: 
# inputfile = days since 100 cases , total number of cases, csv format
# country = name of country of interest
# daterange = boolean : option for a user specified range of days
# daystart = 0  : first day for fit period
# dayend = 100 : last day for fit period
def ObtainGrowthRate(inputfile,country,daterange=False,daystart=0,dayend=100):
    
    #Read in data file 
    df = pandas.read_csv(inputfile)
    
    #Define date range to fit. Default = all days
    mask = np.ones(len(df["Days since 100"]),dtype=bool)
 
    #Mask to only consider days with data
    dayend = df["Days since 100"][np.argmax(df[country])]
    mask = (df["Days since 100"] < dayend)

    #Adjust if date range is specified
    if daterange:
        mask = (df["Days since 100"] > daystart) & (df["Days since 100"] < dayend)
    
    #Check for sufficient data points:
    if not sum(df[country][mask]):
        print("Empty data for",country)
        return 0., 0., 0.
    
    #Perform a linear fit to the data
    popt, pcov = curve_fit(polfit, df["Days since 100"][mask],np.log10(df[country][mask]))
    chi2 = chisquare(df[country][mask],10.**(df["Days since 100"][mask]*popt[0] + popt[1]))

    inflection_date = 0.
    logchi2 = 0
    #Need minimum number of approx. ten days for a logistic fit
    if len(df["Days since 100"][mask]) > 15.:
        #Perform a logistic fit to the data
        log_fit, log_cov = curve_fit(logistic_model,df["Days since 100"][mask],df[country][mask],
                                 p0=[2.5,1,max(df[country][mask])])
        logchi2 = chisquare(df[country][mask],logistic_model(df["Days since 100"][mask],*log_fit))

        inflection_date = log_fit[1]

    if (chi2 < logchi2) or (inflection_date > max(df["Days since 100"][mask])) or not inflection_date:
        growthrate = popt[0]
        print("linear fit preferred for",country,"growth rate",growthrate)
        return growthrate, 0., inflection_date

    print("logistic fit preferred for",country)
    print("inflection date",inflection_date)
    
    #Perform linear fits pre and post inflection date to compare growth rates
    mask1 = mask & (df["Days since 100"] < inflection_date)
    mask2 = mask & (df["Days since 100"] > inflection_date)

    growthrate = 0.
    growthrate2 = 0.
    
    #Check for sufficient data points:
    if sum(mask1) > 5:
        popt, pcov = curve_fit(polfit, df["Days since 100"][mask1],np.log10(df[country][mask1]))
        chi2 = chisquare(df[country][mask1],10.**(df["Days since 100"][mask1]*popt[0] + popt[1]))
        growthrate = popt[0]
    
    if sum(mask2) > 5:
        popt2, pcov2 = curve_fit(polfit, df["Days since 100"][mask2],np.log10(df[country][mask2]))
        chi2_2 = chisquare(df[country][mask2],10.**(df["Days since 100"][mask2]*popt2[0] + popt2[1]))
        growthrate2 = popt2[0]
        
    return growthrate, growthrate2, inflection_date
    

In [131]:
ObtainGrowthRate("CountryCases.csv","United Kingdom")

linear fit preferred for United Kingdom growth rate 0.0988581653017098


The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return getattr(obj, method)(*args, **kwds)


(0.0988581653017098, 0.0, 18.396439308785965)

## List of countries

Now investigate the growth rates in a user-defined list of countries and write to file

In [132]:
#Read in countries from file 
def WriteGrowthRates(country_file):
    
    data = pandas.read_csv(country_file)
    #print(data.columns[0])
        
    csvname = "GrowthRates.csv"

    with open(csvname,mode='w') as growthrate_file:
        csv_writer = csv.DictWriter(growthrate_file,fieldnames=["Country","GrowthRate1","GrowthRate2","DayOfChange"])
        #country_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writeheader()
    
        cid = 1
        while cid < len(data.columns):
            growthrate = ObtainGrowthRate(country_file,data.columns[cid])
            print("Growth rate for",data.columns[cid],"is",growthrate[0])
            csv_writer.writerow({"Country":data.columns[cid],"GrowthRate1":format(growthrate[0],'.5g'),
                                "GrowthRate2":format(growthrate[1],'.5g'),"DayOfChange":format(growthrate[2],'.5g')})
            cid += 1

In [133]:
WriteGrowthRates("CountryCases.csv")

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return getattr(obj, method)(*args, **kwds)


linear fit preferred for Algeria growth rate 0.26786163241132666
Growth rate for Algeria is 0.26786163241132666
linear fit preferred for Australia growth rate 0.09351583948906489
Growth rate for Australia is 0.09351583948906489
linear fit preferred for Brazil growth rate 0.13343243793887039
Growth rate for Brazil is 0.13343243793887039
Empty data for Burkina Faso
Growth rate for Burkina Faso is 0.0
linear fit preferred for Chile growth rate 0.100101908685958
Growth rate for Chile is 0.100101908685958
logistic fit preferred for China
inflection date 21.303633540526747
Growth rate for China is 0.11434295276366868
linear fit preferred for Egypt growth rate 0.05970013439435429
Growth rate for Egypt is 0.05970013439435429
linear fit preferred for Germany growth rate 0.10807507727060052
Growth rate for Germany is 0.10807507727060052
logistic fit preferred for Iran (Islamic Republic of)
inflection date 17.351265812343037
Growth rate for Iran (Islamic Republic of) is 0.10937546588694347
logist