In [None]:
!pip install covasim

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting covasim
  Downloading covasim-3.1.4-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
Collecting sciris>=2.0.1 (from covasim)
  Downloading sciris-3.0.0-py3-none-any.whl (213 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m213.5/213.5 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
Collecting xlsxwriter (from sciris>=2.0.1->covasim)
  Downloading XlsxWriter-3.1.2-py3-none-any.whl (153 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.0/153.0 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from sciris>=2.0.1->covasim)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting zstandard (from sciris>=2.0.1-

In [None]:
import covasim as cv
import numpy as np
import pandas as pd
import random
import math

Covasim 3.1.4 (2022-10-22) — © 2020-2022 by IDM


In [None]:
#Non Over Dispersed Data - k = 1.12, R = 3.9 (par1 = 100, par2 = 2.0)
#Over Dispersed Data - k = 0.497, R = 3.84 (par1 = 100, par2 = 0.5)
##initialize simulations using multisim
pars = dict(
    pop_type = 'hybrid',
    beta_layer  = dict(h=0.6, s=0.6, w=0.6, c=0.6),
    beta_dist = dict(dist='neg_binomial', par1=100, par2=0.5),
    pop_size = 5000
    #use_waning=False
)
sims = cv.MultiSim(cv.Sim(pars), n_runs=1000,keep_people=True)
sims.run()
infection_logs = []
# Access the infection log of each simulation
for sim in sims.sims:
    infection_logs.append(sim.people.infection_log)
    # Do something with the infection log, such as analyzing or plotting the data

In [None]:
def getData(infections):
  ##Goal is to convert this data into list of dictionaries
  ##Each dictionary will have key:value pairs for ID and the number of people infected at each layer

  ##Ignores date of Infection, could incorperate this

  ##Removing seed infections
  infectionsWithoutSeeds = infections[20:]

  def keyForSort(e):
    return e.get('source')

  infectionsWithoutSeeds.sort(key=keyForSort)

  infectors = []
  infectees = []

  clusters = []
  currInfector = 0
  schoolCount = 0
  homeCount = 0
  communityCount = 0
  workCount = 0

  for i in range(len(infectionsWithoutSeeds)):
    if (infectionsWithoutSeeds[i].get('source') not in infectors):
      infectors.append(infectionsWithoutSeeds[i].get('source'))
    if (infectionsWithoutSeeds[i].get('target') not in infectees):
      infectees.append(infectionsWithoutSeeds[i].get('target'))
    if(i==0):
      currInfector = infectionsWithoutSeeds[i].get('source')
    if(infectionsWithoutSeeds[i].get('source')!=currInfector):
      ##Need to create dict object here before reset
      currData = dict(Source = currInfector, homeCount = homeCount, communityCount = communityCount, schoolCount = schoolCount, workCount = workCount)
      clusters.append(currData)
      ##Reset
      currInfector = infectionsWithoutSeeds[i].get('source')
      schoolCount = 0
      homeCount = 0
      communityCount = 0
      workCount = 0
    currLayer = infectionsWithoutSeeds[i].get('layer')
    if(currLayer == 'c'):
      communityCount += 1
    elif(currLayer == 's'):
      schoolCount += 1
    elif(currLayer == 'w'):
      workCount += 1
    elif(currLayer == 'h'):
      homeCount += 1
    else:
      print('error found in data: ', currLayer)

  ##Save the perfectly traced data here

  perfectData = []
  for i in range(len(clusters)):
    perfectClusterSize = clusters[i].get('communityCount') + clusters[i].get('workCount') + clusters[i].get('schoolCount') + clusters[i].get('homeCount')
    perfectData.append(perfectClusterSize)

  #account for those who didn't infect anyone
  for i in range(len(infectees) - len(infectors)):
    perfectData.append(0)

  percents = [0.75,0.5,0.25]
  subsettedData = []
  for percent in percents:
    clusterDataAltered = clusters
    for i in range(len(clusterDataAltered)):
      clusterDataAltered[i]['communityCount'] = math.ceil(percent*clusterDataAltered[i].get('communityCount'))
      clusterDataAltered[i]['workCount'] = math.ceil(percent*clusterDataAltered[i].get('workCount'))
      clusterDataAltered[i]['schoolCount'] = math.ceil(percent*clusterDataAltered[i].get('schoolCount'))
    alteredData = []
    for i in range(len(clusters)):
      alteredClusterSize = clusterDataAltered[i].get('communityCount') + clusterDataAltered[i].get('workCount') + clusterDataAltered[i].get('schoolCount') + clusterDataAltered[i].get('homeCount')
      alteredData.append(alteredClusterSize)
    for i in range(len(infectees) - len(infectors)):
      alteredData.append(0)
    subsettedData.append(alteredData)
  return (perfectData, subsettedData)

In [None]:
perfectData = []
twentyFivePercentData = []
fiftyPercentData = []
seventyFivePercentData = []
for i in infection_logs:
  data = getData(i)
  perfectData.append(data[0])
  twentyFivePercentData.append(data[1][2])
  fiftyPercentData.append(data[1][1])
  seventyFivePercentData.append(data[1][0])

In [None]:
from google.colab import drive
drive.mount('drive')

dfPerf = pd.DataFrame(perfectData)
df25 = pd.DataFrame(twentyFivePercentData)
df50 = pd.DataFrame(fiftyPercentData)
df75 = pd.DataFrame(seventyFivePercentData)

dfPerf.to_csv('metaDataPerfOverDispersedSameBeta.csv', index = False)
!cp metaDataPerfOverDispersedSameBeta.csv "drive/My Drive/Covasim"

df25.to_csv('metaData25OverDispersedSameBeta.csv', index = False)
!cp metaData25OverDispersedSameBeta.csv "drive/My Drive/Covasim"

df50.to_csv('metaData50OverDispersedSameBeta.csv', index = False)
!cp metaData50OverDispersedSameBeta.csv "drive/My Drive/Covasim"

df75.to_csv('metaData75OverDispersedSameBeta.csv', index = False)
!cp metaData75OverDispersedSameBeta.csv "drive/My Drive/Covasim"

Mounted at drive
