# Spread of infection 

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')


In [2]:
airports_file = "../csv/infection/US_airport.csv"
airports = pd.read_csv(airports_file, index_col=0)

flights_file = "../csv/infection/flights.csv"
flights = pd.read_csv(flights_file, sep=" ")

In [3]:
flights

Unnamed: 0,Source,Destination,StartTime,EndTime,Duration
0,0,1,1229286900,1229291520,4620
1,0,1,1229272800,1229277300,4500
2,0,2,1229285640,1229291520,5880
3,0,3,1229268060,1229276040,7980
4,0,3,1229284740,1229293140,8400
...,...,...,...,...,...
180187,260,24,1230066420,1230070440,4020
180188,260,24,1230049200,1230052620,3420
180189,260,16,1230059400,1230064200,4800
180190,260,16,1230035400,1230040200,4800


In [4]:
airports

Unnamed: 0_level_0,symbol,airport name,city,latitude,longitude,xcoordviz,ycoordviz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ABE,Lehigh Valley Intl,Allentown,40.652083,-75.440806,1.926093e+06,337357.529978
1,CLE,Cleveland Hopkins Intl,Cleveland,41.411689,-81.849794,1.384469e+06,302554.338074
2,CLT,Charlotte Douglas Intl,Charlotte,35.214000,-80.943139,1.590611e+06,-355004.415565
3,ORD,Chicago Ohare Intl,Chicago,41.978603,-87.904842,8.790187e+05,289127.727396
4,ATL,Hartsfield Jackson Atlanta Intl,Atlanta,33.636719,-84.428067,1.306257e+06,-582505.812325
...,...,...,...,...,...,...,...
274,RDD,Redding Muni,Redding,40.509000,-122.293389,-1.976890e+06,334403.078441
275,SUN,Friedman Mem,Hailey,43.504444,-114.296194,-1.261565e+06,515731.793677
276,TWF,Magic Valley Regional Airport,Twin Falls,42.481803,-114.487733,-1.296508e+06,406881.420680
277,RHI,Rhinelander Oneida County Airport,Rhinelander,45.631200,-89.467500,7.102613e+05,678171.825235


In [5]:
def get_airport_name(airports, index):
    return airports.at[index, 'city']

def spread(dataset, airports, init_city = 'Allentown', p = 0.5, verbose=False):
    infected_cities = set([init_city])
    result = {dataset['StartTime'].min() : [init_city]}
    if verbose:
        print("p = {}, init_city = {}".format(p, init_city))
        print("Starting infection...")
    
    for index, flight in dataset.iterrows():
        if get_airport_name(airports, flight['Source']) in infected_cities:
            if np.random.random() <= p:
                destination = get_airport_name(airports, flight['Destination'])
                if destination in infected_cities:
                    continue
                if verbose:
                    print("Infecting city {}".format(destination))
                infection_time = flight['EndTime']
                if infection_time in result:
                    result[infection_time].append(destination)
                else:
                    result[infection_time] = [destination]
                infected_cities.add(destination)
    return result

In [6]:
infection_spread = spread(flights, airports, verbose=True)

p = 0.5, init_city = Allentown
Starting infection...
Infecting city Cleveland
Infecting city Chicago
Infecting city Atlanta
Infecting city Detroit
Infecting city Houston
Infecting city Charlotte
Infecting city Denver
Infecting city Washington
Infecting city Birmingham
Infecting city Pittsburgh (pennsylva)
Infecting city Knoxville
Infecting city Mobile
Infecting city Buffalo
Infecting city Milwaukee
Infecting city Valparaiso
Infecting city Cincinnati
Infecting city Minneapolis
Infecting city Phoenix
Infecting city San Diego
Infecting city Fort Myers
Infecting city Las Vegas
Infecting city Orlando
Infecting city San Francisco
Infecting city Los Angeles
Infecting city New York
Infecting city Tampa
Infecting city Salt Lake City
Infecting city San Juan
Infecting city Philadelphia
Infecting city Newark
Infecting city Colombia
Infecting city West Palm Beach
Infecting city Fort Lauderdale
Infecting city Raleigh-durham
Infecting city Richmond
Infecting city Miami
Infecting city Santa Ana
Infect

In [42]:
import itertools

def infection_report(airports, infection_spread):
    flatten = list(itertools.chain.from_iterable(infection_spread.values()))
    print("In total {} of {} cities was infected".format(len(flatten), len(airports['city'].unique())))

In [8]:
infection_report(airports, infection_spread)

In total 269 of 269 cities was infected


In [13]:
from tqdm import notebook



probabilities = [0.01, 0.05, 0.1, 0.5, 1.]
results = {}

for p in notebook.tqdm(probabilities):
    results[p] = []
    for i in notebook.tqdm(range(10)):
        results[p].append(spread(flights, airports, p = p))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))





In [14]:
import pickle

file_path = '/home/mikhail/PycharmProjects/intro/models/infection.pickle'

with open(file_path, 'wb') as f:
    pickle.dump(results, f)

# with open(file_path, 'rb') as f:
#     results = pickle.load(f)

In [43]:
for res in results[0.01]:
    infection_report(airports, res)

In total 162 of 269 cities was infected
In total 164 of 269 cities was infected
In total 7 of 269 cities was infected
In total 37 of 269 cities was infected
In total 53 of 269 cities was infected
In total 179 of 269 cities was infected
In total 19 of 269 cities was infected
In total 151 of 269 cities was infected
In total 1 of 269 cities was infected
In total 171 of 269 cities was infected


In [122]:
from datetime import datetime

def iter_by_12hours(data, interval_start):
    total = len(list(itertools.chain.from_iterable(data.values())))
    infected = []
    results = {}
    interval = 0
    for time in sorted(data):
        delta = abs(datetime.fromtimestamp(time).hour - interval_start.hour)
        if delta < 12:
            infected.extend(list(x for x in data[time]))
        else:
            interval_start = datetime.fromtimestamp(time)
            results[interval] = len(infected) / total
            interval = interval + 1
    return results
        
total_city_count = 269
start_datetime = datetime.fromtimestamp(flights['StartTime'].min())
time_report = {}
for p in probabilities:
    time_report[p] = []
    for i in range(10):
        result_for_iteration = iter_by_12hours(results[p][i], start_datetime)    
        if not result_for_iteration:
            result_for_iteration = {0: 1/total_city_count}
        time_report[p].append({i: result_for_iteration})

time_report[0.01][5][5]

{0: 0.0670391061452514,
 1: 0.12849162011173185,
 2: 0.25139664804469275,
 3: 0.3854748603351955,
 4: 0.5083798882681564,
 5: 0.5698324022346368,
 6: 0.6089385474860335,
 7: 0.6703910614525139,
 8: 0.6871508379888268,
 9: 0.7262569832402235,
 10: 0.770949720670391,
 11: 0.7932960893854749,
 12: 0.7988826815642458,
 13: 0.8212290502793296,
 14: 0.8324022346368715,
 15: 0.8324022346368715,
 16: 0.8435754189944135,
 17: 0.8603351955307262,
 18: 0.8659217877094972,
 19: 0.8770949720670391}

In [118]:
def count_avg(data, interval_count = 20, num_of_iter = 10):
    result = {}
    for hour_interval in range(interval_count):
        result[hour_interval] = 0.0
        sum_by_hour = 0
        for iteration in range(num_of_iter):
#           if no information about interval, then get info from last interval
            if hour_interval not in data[iteration][hour_interval]:
                print(data[iteration][hour_interval][hour_interval])
                data[iteration][hour_interval][hour_interval] = data[iteration][hour_interval-1][hour_interval-1]
            sum_by_hour = sum_by_hour + data[iteration][hour_interval][hour_interval]
#       average
        result[hour_interval] = sum_by_hour / 10
    return result

count_avg(time_report[0.01])

KeyError: 0