In [1]:
import pandas as pd
import numpy as np
import random
import os
import matplotlib.pyplot as plt

In [2]:
chunked_data = pd.read_csv('src_data/cbg2cbg_revise.csv', chunksize=100000)
chunk_list = []
for chunk in chunked_data:
    chunk_list.append(chunk)
transport_data = pd.concat(chunk_list)
transport_data

Unnamed: 0,poi_cbg_source,poi_cbg_destination,src_prob,des_prob
0,10010201001,10010201001,0.050305,0.038239
1,10010201001,10010202001,0.039683,0.017381
2,10010201001,10010202002,0.018538,0.041715
3,10010201001,10010203002,0.015597,0.130939
4,10010201001,10010204003,0.006445,0.006952
...,...,...,...,...
17423096,560459513003,560279572003,0.009756,0.008876
17423097,560459513003,560459511002,0.333333,0.004438
17423098,560459513003,560459513001,0.274900,0.102071
17423099,560459513003,560459513002,0.202834,0.338757


In [3]:
pop_data = pd.read_csv('src_data/usa_population_revise.csv')
infection_chance_per_day=[0.2,0.3,0.2,0.1,0.1,0.1,0.1,0.1]
src_cbg_names = list(transport_data['poi_cbg_source'].unique())
pop_data

Unnamed: 0,GeoId,Population
0,10010201001,730.0
1,10010201002,1263.0
2,10010202001,835.0
3,10010202002,1124.0
4,10010203001,2774.0
...,...,...
220329,721537506011,883.0
220330,721537506012,2523.0
220331,721537506013,991.0
220332,721537506021,1577.0


In [4]:
n_src_CBG = len(src_cbg_names)
n_src_CBG

216733

In [5]:
def get_random_des(src_df):
    randval = random.random()
    cumprob = src_df['des_prob'].cumsum()
    sumprob = src_df['des_prob'].sum()
    if sumprob == 0:
        return
    randval *= sumprob
    checkprob = 0
    for des, prob in zip(src_df['poi_cbg_destination'], cumprob):
        if checkprob == prob:
            continue
        if randval < prob:
            return des
    return src_df.loc[-1,'poi_cbg_destination']

def get_random_src(des_df):
    randval = random.random()
    cumprob = des_df['src_prob'].cumsum()
    sumprob = des_df['src_prob'].sum()
    if sumprob == 0:
        return
    randval *= sumprob
    checkprob = 0
    for src, prob in zip(des_df['poi_cbg_source'], cumprob):
        if checkprob == prob:
            continue
        if randval < prob:
            return src
    return des_df.loc[-1,'poi_cbg_source']

In [7]:
random_src = src_cbg_names[random.randint(0,n_src_CBG-1)]
random_src

121319506036

In [8]:
group_by_src = transport_data.groupby('poi_cbg_source')
group_by_des = transport_data.groupby('poi_cbg_destination')

In [37]:
def initCounter():
    per_day_S = pop_data.copy()
    per_day_S = per_day_S.set_index('GeoId')
    per_day_S = per_day_S.rename(columns={'Population':'Day1'})
    per_day_I = per_day_S.copy()
    per_day_I['Day1'] = 0
    per_day_R = per_day_I.copy()

    return [per_day_S,per_day_I,per_day_R]

In [39]:
def initCase(counters, new_case_nums = [1], new_case_CBGs = None):
    if new_case_CBGs is None:
        new_case_CBGs = [src_cbg_names[random.randint(0,n_src_CBG-1)]]

    active_cases = [] # list of [cbg, numday]
    for case_num, case_cbg in zip(new_case_nums,new_case_CBGs):
        case_num = min(counters[0].loc[case_cbg,'Day1'],case_num)
        counters[0].loc[case_cbg,'Day1'] -= case_num
        counters[1].loc[case_cbg,'Day1'] += case_num

        for _ in range(case_num):
            active_cases.append([case_cbg,0])

    return active_cases

In [42]:
def nextDay(counters, current_day, active_cases):
    current_col = 'Day%d'%current_day
    for i in range(3):
        counters[i][current_col] = counters[i]['Day%d'%(current_day-1)].copy()

    new_active_cases = []
    i = 0
    while i < range(len(active_cases)):
        src_cbg, num_day = active_cases[i]
        if random.random() < infection_chance_per_day[num_day]:
            des_cbg = get_random_des(group_by_src.get_group(src_cbg))
            rev_src_cbg = get_random_src(group_by_des.get_group(des_cbg))

            if counters[0].loc[rev_src_cbg,current_col] == 0:
                print('cbg %d full'% src_cbg)
            else: # activate a new case
                counters[0].loc[rev_src_cbg,current_col] -= 1
                counters[1].loc[rev_src_cbg,current_col] += 1
                # collect new case
                new_active_cases.append([rev_src_cbg, 0])

        active_cases[i][1] += 1
        if active_cases[i][1] == len(infection_chance_per_day):
            infection_chance_per_day.pop(i)
            # one case recover
            counters[1].loc[src_cbg,current_col] -= 1
            counters[2].loc[src_cbg,current_col] += 1
        else:
            i += 1