In [1]:
import pandas as pd
import networkx as nx
import pickle
from datetime import datetime

In [2]:
#first lets read in our countryRestrictions dataset
with open('countryRestrictions.pickle', 'rb') as handle:
    countryRestrictions = pickle.load(handle)

#lets check what countries we have data for here
countries = list(countryRestrictions.keys())
print(len(countries))
countryRestrictions['Sweden']

175


{'baseline': [[0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [1.0, 0.0, 1.0, 1.0, 0.0],
  [1.0, 0.0, 2.0, 2.0, 0.0],
  [8.0, 0.0, 10.0, 10.0, 0.0],
  [3.0, 0.0, 13.0, 13.0, 0.0],
  [0.0, 0.0, 13.0, 13.0, 0.0],
  [5.0, 0.0, 18.0, 18.0, 0.0],
  [13.0, 0.0, 31.0, 31.0, 0.0],
  [30.0, 0.0, 61.0, 61.0, 0.0],
  [25.0, 0.0, 86.

In [3]:
#Ok now that we have our dataset lets establish a baseline for every country
#We need to iterate through the dataset, for every country that doesent have a baseline we put in the
#general covid infection baseline of 25%, otherwise we use that countries baseline
infectOfRegs = {}
for country, regulation in countryRestrictions.items():
    currCountry = {}
    #we need to make sure that our country has an actual baseline, if not 
    if 'baseline' in regulation:
        #we don't want to start counting the baseline until someone is infected, since that would skey our data
        infected = False
        #we need to keep track of all day to day changes of infections than average it out over the baseline period
        changeInInfections = []
        for infections in regulation['baseline']:
            #here we don't start recording data until someone has been infected
            if not infected:
                if infections[0] > 0:
                    infected = True
                    previousInfected = infections[3]
            else:
                #we want to keep track of the average number of infections per person, per day
                currentInfected = infections[3]
                #make sure we subtract any deaths from our current infected 
                currentInfected -= infections[1]
                if previousInfected == 0:
                    changeInInfections.append(currentInfected)
                else:
                    changeInInfections.append((currentInfected/previousInfected)-1)
                previousInfected = currentInfected
        #lets add our baseline to the countries list at the appriopriate place
        #Remember we are averaging the entire list to get one nice fraction
        if changeInInfections:
            #we also want to make sure our baseline is not too skewed, so if it not with .1 of .25 we will
            #just set our baseline as .25
            baselineValue = sum(changeInInfections)/len(changeInInfections)
            if baselineValue > .4 or baselineValue < .15:
                baselineValue = .25
            currCountry['baseline'] = baselineValue
        else:
            currCountry['baseline'] = 0.25
    else:
        currCountry['baseline'] = 0.25
        
    #now we need to repeat the process for every new regulation
    for regs, infectData in regulation.items():
        #We have already checked the baseline, so lets skip that one if it exists
        if regs != 'baseline':
            #like before lets set our list so we can average over it later
            changeInInfections = []
            #We also want to make sure we are getting the difference between infections so we don't do any math
            #if it is the first element in the list
            first = True
            for infections in infectData:
                if first:
                    first = False
                    previousInfected = infections[3]
                else:
                    #we want to keep track of the average number of infections per person, per day
                    currentInfected = infections[3]
                    #make sure we subtract any deaths from our current infected 
                    currentInfected -= infections[1]
                    if previousInfected == 0:
                        changeInInfections.append(currentInfected)
                    else:
                        changeInInfections.append((currentInfected/previousInfected)-1)
                    previousInfected = currentInfected
            #remember to get the average so we get a nice fraction
            if changeInInfections:
                currCountry[regs] = sum(changeInInfections)/len(changeInInfections)
    infectOfRegs[country] = currCountry

infectOfRegs['United States of America']

{'baseline': 0.20737056149062355,
 'travel,': 0.23684563085742963,
 'travel,masks,': 0.021784213098193183,
 'travel,testing,testing_narrow,masks,': 0.011354511721673486}

In [6]:
#Ok we got those statistics! Now lets get some threshholds for these events happening
#we simply need the dates that come from our government_measures dataset
govResponse = pd.read_excel("Gov_Responses.xlsx", sheet_name = "Dataset")
govResponse = govResponse[govResponse["country"].isin(countries)]
govResponse

Unnamed: 0,country,geoid,iso,d,cases,deaths,school,school_local,domestic,domestic_local,...,wage,credit,taxc,taxd,export,rate,Rigidity_Public_Health,Economic_Measures,population_2019,continent
275,Afghanistan,AF,AFG,2020-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,38000000.0,Asia
276,Afghanistan,AF,AFG,2020-01-02 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,38000000.0,Asia
277,Afghanistan,AF,AFG,2020-01-03 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,38000000.0,Asia
278,Afghanistan,AF,AFG,2020-01-04 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,38000000.0,Asia
279,Afghanistan,AF,AFG,2020-01-05 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,38000000.0,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57470,Zimbabwe,ZW,ZWE,2020-09-27 00:00:00,16.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.423077,0.0,15000000.0,Africa
57471,Zimbabwe,ZW,ZWE,2020-09-28 00:00:00,9.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.423077,0.0,15000000.0,Africa
57472,Zimbabwe,ZW,ZWE,2020-09-29 00:00:00,4.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.423077,0.0,15000000.0,Africa
57473,Zimbabwe,ZW,ZWE,2020-09-30 00:00:00,21.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.423077,0.0,15000000.0,Africa


In [10]:
#We only have a few things we care about when it comes to regulations
regulations = ['travel',
 'travel_partial',
 'travel_dom',
 'travel_dom_partial',
 'testing',
 'testing_narrow',
 'masks']

#Lets iterate through our dataset, then mark the first time each of those regulations occured
datesForRegs = {}
#again we want to set the currentcountry to afghanistan first, since we only add to a dataset after it is finished
currCountry = "Afghanistan"
regDate = {}
deaths = 0
totalInfect = 0
for index, row in govResponse.iterrows():
    #when we get to a new country, add the previous to the list
    if currCountry != row['country']:
        datesForRegs[currCountry] = regDate
        currCountry = row['country']
        regDate = {}
        deaths = 0
        totalInfect = 0
    deaths += row['deaths']
    totalInfect += row['cases']
    for reg in regulations:
        #make sure that we are not adding the regulation date every new time there is a regulation
        #just the first time, and ignoring the rest
        if row[reg] == 1 and reg not in regDate:
            regDate[reg] = [deaths, totalInfect]
datesForRegs[currCountry] = regDate

datesForRegs['China']

{'travel_dom': [17.0, 596.0],
 'travel_dom_partial': [17.0, 596.0],
 'masks': [17.0, 596.0],
 'travel': [3253.0, 81202.0],
 'travel_partial': [3253.0, 81202.0],
 'testing': [3296.0, 81800.0],
 'testing_narrow': [3296.0, 81800.0]}

In [15]:
#Lets dump these files so we can start to work on the actual simulation
with open('Threshold.pickle', 'wb') as handle:
    pickle.dump(datesForRegs, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('RegulationInfectivity.pickle', 'wb') as handle:
    pickle.dump(infectOfRegs, handle, protocol=pickle.HIGHEST_PROTOCOL)