# Spread of infection 

In [28]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')


In [29]:
airports_file = "../csv/infection/US_airport.csv"
airports = pd.read_csv(airports_file, index_col=0)

flights_file = "../csv/infection/flights.csv"
flights = pd.read_csv(flights_file, sep=" ")

In [38]:
flights

Unnamed: 0,Source,Destination,StartTime,EndTime,Duration
0,0,1,1229286900,1229291520,4620
1,0,1,1229272800,1229277300,4500
2,0,2,1229285640,1229291520,5880
3,0,3,1229268060,1229276040,7980
4,0,3,1229284740,1229293140,8400
...,...,...,...,...,...
180187,260,24,1230066420,1230070440,4020
180188,260,24,1230049200,1230052620,3420
180189,260,16,1230059400,1230064200,4800
180190,260,16,1230035400,1230040200,4800


In [39]:
airports

Unnamed: 0_level_0,symbol,airport name,city,latitude,longitude,xcoordviz,ycoordviz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ABE,Lehigh Valley Intl,Allentown,40.652083,-75.440806,1.926093e+06,337357.529978
1,CLE,Cleveland Hopkins Intl,Cleveland,41.411689,-81.849794,1.384469e+06,302554.338074
2,CLT,Charlotte Douglas Intl,Charlotte,35.214000,-80.943139,1.590611e+06,-355004.415565
3,ORD,Chicago Ohare Intl,Chicago,41.978603,-87.904842,8.790187e+05,289127.727396
4,ATL,Hartsfield Jackson Atlanta Intl,Atlanta,33.636719,-84.428067,1.306257e+06,-582505.812325
...,...,...,...,...,...,...,...
274,RDD,Redding Muni,Redding,40.509000,-122.293389,-1.976890e+06,334403.078441
275,SUN,Friedman Mem,Hailey,43.504444,-114.296194,-1.261565e+06,515731.793677
276,TWF,Magic Valley Regional Airport,Twin Falls,42.481803,-114.487733,-1.296508e+06,406881.420680
277,RHI,Rhinelander Oneida County Airport,Rhinelander,45.631200,-89.467500,7.102613e+05,678171.825235


In [100]:
def get_airport_name(airports, index):
    return airports.at[index, 'city']

def spread(dataset, airports, init_city = 'Allentown', p = 0.5, verbose=False):
    infected_cities = set([init_city])
    result = {dataset['StartTime'].min() : [init_city]}
    if verbose:
        print("p = {}, init_city = {}".format(p, init_city))
        print("Starting infection...")
    
    for index, flight in dataset.iterrows():
        if get_airport_name(airports, flight['Source']) in infected_cities:
            if np.random.random() <= p:
                destination = get_airport_name(airports, flight['Destination'])
                if destination in infected_cities:
                    continue
                if verbose:
                    print("Infecting city {}".format(destination))
                infection_time = flight['EndTime']
                if infection_time in result:
                    result[infection_time].append(destination)
                else:
                    result[infection_time] = [destination]
                infected_cities.add(destination)
    return result

In [101]:
infection_spread = spread(flights, airports, verbose=True)

p = 0.5, init_city = Allentown
Starting infection...
Infecting city Cleveland
Infecting city Charlotte
Infecting city Chicago
Infecting city Atlanta
Infecting city Newark
Infecting city Houston
Infecting city Washington
Infecting city Greensboro
Infecting city Austin
Infecting city Birmingham
Infecting city Greenville
Infecting city Pittsburgh (pennsylva)
Infecting city Philadelphia
Infecting city Knoxville
Infecting city Mobile
Infecting city Buffalo
Infecting city Jackson
Infecting city Pensacola
Infecting city Minneapolis
Infecting city Jacksonville
Infecting city Orlando
Infecting city San Francisco
Infecting city Cincinnati
Infecting city Los Angeles
Infecting city Dallas-fort Worth
Infecting city Miami
Infecting city New York
Infecting city Tampa
Infecting city Las Vegas
Infecting city San Diego
Infecting city Raleigh-durham
Infecting city San Juan
Infecting city Valparaiso
Infecting city Fort Lauderdale
Infecting city Richmond
Infecting city Fort Myers
Infecting city Columbus
In

In [115]:
import itertools

def infection_report(airports, infection_spread):
    flatten = list(itertools.chain.from_iterable(infection_spread.values()))
    print("In total {} of {} cities was infected".format(len(flatten), len(airports['city'].unique())))
    if len(flatten) != len(airports['city'].unique()):
        print("Were not infected: {}".format(set(airports['city']) - set(flatten)))

In [103]:
infection_report(airports, infection_spread)

In total 269 of 269 cities was infected


In [107]:
from tqdm import notebook



probabilities = [0.01, 0.05, 0.1, 0.5, 1.]
results = {}

for p in notebook.tqdm(probabilities):
    results[p] = []
    for i in notebook.tqdm(range(10)):
        results[p].append(spread(flights, airports, p = p))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))





In [111]:
import pickle

file_path = 'path/to/dump'

with open(file_path, 'wb') as f:
    pickle.dump(results, f)

In [119]:
for res in results[0.5]:
    infection_report(airports, res)

In total 269 of 269 cities was infected
In total 268 of 269 cities was infected
In total 268 of 269 cities was infected
In total 269 of 269 cities was infected
In total 267 of 269 cities was infected
In total 268 of 269 cities was infected
In total 269 of 269 cities was infected
In total 268 of 269 cities was infected
In total 269 of 269 cities was infected
In total 269 of 269 cities was infected
