In [1]:
import sys
sys.path.append("/Users/sucharitajayanti/Documents/W'20/CS 189 - Network Science and Complex Systems/finalProject/adversarial-epidemics/src")

In [2]:
import networkx as nx
import itertools
import datapackage
import numpy as np
import pandas as pd
from typing import List
import matplotlib.pyplot as plt
from simulator import SIRD, SIRDGraph
import json

## Yakoob's Code ##

In [3]:
''' Python Helper functions '''

def load_data(filepath):
    # load data into pandas dataframe
    return pd.read_csv(filepath)

# creates the global airport network (domestic and international)
def flight_graph(df):
    # Create directed graph from routes data
    G = nx.DiGraph()
    for index, row in df.iterrows(): 
        G.add_edge(row['Source ID'], row['Destination ID']) 
        
    return G


def summarize_graph(G):
    # Summary statistics about the network
    return nx.info(G)


def plot_degree_distribution(G):
    # extract degree of each airport sorted in decreasing order
    degrees = [airport[1] for airport in sorted(nx.degree(G), key=lambda x: x[1], reverse=True)]

    plt.plot(degrees)


# retrieves the country name given an ID
def country_dict(df2):
    # Get the country name from ID number
    country = dict()

    for index, row in df2.iterrows(): 
        country[row['Airport ID']] = row['Country']

    return country


# helper function to retrieve country name given an airport ID
def get_country(ID, country):
    # @return country_name - str
    if ID == "\\N" or float(ID) not in country:
        return 'No ID'

    return country[float(ID)]

# helper function used to rank countries by number of airports
def num_airports(G, country):
    num_airports = dict()

    for airport in  nx.degree(G):
        name = get_country(airport[0], country)
        if name != 'No ID':
            num_airports[name] = num_airports.get(name, 0) + 1

    names, num = [], [] 
    for name, count in sorted(num_airports.items(), key=lambda item: item[1], reverse=True):
        names.append(name)
        num.append(count)
        
#     d = {'Country': names, 'Number of Airports': num}
#     country_df = pd.DataFrame(data=d)
#     country_df.head(10)
    
    return names, num

# Main function used to generate the country graph
def create_country_graph(df, country):
    # International graph network, weighted by number of routes between two countries
    country_graph = nx.DiGraph()
    max_weight = 0
    
    for index, row in df.iterrows(): 
        source, destination = get_country(row['Source ID'],country), get_country(row['Destination ID'],country)
        if source != 'No ID' and destination != 'No ID':
            if country_graph.has_edge(source, destination):
                country_graph[source][destination]['weight'] += 1
            else:
                country_graph.add_edge(source, destination, weight=1) 

            max_weight = max(max_weight, country_graph[source][destination]['weight'])
    
    return country_graph, max_weight

def load_datasets():
    df = load_data('../data/routes.txt')
    df = df.drop(['Airline', 'Airline ID', 'Codeshare', 'Stops', 'Equipment'], axis=1)

    # Read airport data to map Airport ID -> Country
    df2 = load_data('../data/airports.txt')
    df2 = df2.drop(['Latitude','Longitude','Altitude','Timezone','DST',
                    'database time zone','Type','Source','City','Name'], axis=1) 
    
    return df, df2

In [4]:
# load the routes and airport csv files into pandas dataframes
df, df2 = load_datasets()

# create a id to country mapping
country = country_dict(df2)

# create country graph
G, max_weight = create_country_graph(df, country)

## Sucharita's Code ##

In [5]:
def clean_up_data(G, D, string, default_val):
    missed = np.setdiff1d(list(G.nodes()), list(D.keys()))
    if(len(missed)):
        print("missing " + string + " for the following locations:")
        print(missed)
        for i in missed:
            D[i] = default_val
    return D

In [6]:
# Calculates what the weight should be on edge (n1,n2) n1!=n2
# Will get more complicated eventually

def get_edge_weight(base_weight, w_n1, w_n2):
    return base_weight

In [7]:
def get_graph_with_labels(G, pop_dict, node_wreg, spread_rate, mortality_rate, recovery, infected, pos, max_weight):
    
    pop_dict = clean_up_data(G, pop_dict, "populations", 0)
    node_wreg = clean_up_data(G, node_wreg, "weight regulations", 1)
    spread_rate = clean_up_data(G, spread_rate, "b (spread rate)", 0)
    mortality_rate = clean_up_data(G, mortality_rate, "w (mortality rate)", 0)
    recovery = clean_up_data(G, recovery, "k (recovery)", 0.5)
    infected = clean_up_data(G, infected, "i (% initially infected)", 0)
    pos = clean_up_data(G, pos, "positions", (0,0))
    
    max_weight = max_weight + 0.0000000001

    nodes = [(n, SIRD(b=spread_rate[n], k=recovery[n], w=mortality_rate[n], N=pop_dict[n], i=infected[n]), pos[n]) for n in list(G.nodes())] 
    edges = [(u,v,get_edge_weight(d['weight'], node_wreg[u], node_wreg[v])/max_weight) for (u,v,d) in G.edges(data=True)]
    
    return nodes, edges


In [8]:
# Population Related Functions

def get_pop_data():
    data_url = 'https://datahub.io/JohnSnowLabs/population-figures-by-country/datapackage.json'

    # to load Data Package into storage
    package = datapackage.Package(data_url)

    # to load only tabular data
    resources = package.resources
    for resource in resources:
        if resource.tabular:
            data = pd.read_csv(resource.descriptor['path'])
    return data

def pop_dict_year(data, year):
    label = 'Year_' + str(year)
    df = data[['Country',label]]
    pop_dict = {}
    for i in range(df.shape[0]):
        pop_dict[df.iloc[i][0]] = df.iloc[i][1]

    return pop_dict

In [9]:
pop_dict = pop_dict_year(get_pop_data(), 2003)
pop_dict["Hong Kong"] = pop_dict['Hong Kong SAR, China']

node_wreg = {k:1 for k in pop_dict}
spread_rate = {k:0.5 for k in pop_dict}
mortality_rate = {k:0.1 for k in pop_dict}
recovery = {k:0.8 for k in pop_dict}
infected = {k:0 for k in pop_dict}
infected["Germany"] = 1
infected["Canada"] = 8
infected["Singapore"] = 20 
infected["Hong Kong"] = 95
infected["Switzerland"] = 2
infected["Thailand"] = 1
infected["Vietnam"] =  40
infected["China"] = 100

percent_infected = {n:infected[n]/pop_dict[n] for n in infected}
pos = nx.spring_layout(G)


n, e = get_graph_with_labels(G, pop_dict, node_wreg, spread_rate, mortality_rate, recovery, percent_infected, pos, max_weight)

missing populations for the following locations:
['Bahamas' 'British United States' 'Brunei' 'Cape Verde'
 'Congo (Brazzaville)' 'Congo (Kinshasa)' 'Cook Islands' 'Egypt' 'Gambia'
 'Iran' 'Kyrgyzstan' 'Laos' 'Macau' 'Macedonia' 'Micronesia' 'Niue'
 'North Korea' 'Russia' 'Saint Kitts and Nevis' 'Saint Lucia'
 'Saint Vincent and the Grenadines' 'Slovakia' 'South Korea' 'Taiwan'
 'Venezuela' 'Western Sahara' 'Yemen']
missing weight regulations for the following locations:
['Bahamas' 'British United States' 'Brunei' 'Cape Verde'
 'Congo (Brazzaville)' 'Congo (Kinshasa)' 'Cook Islands' 'Egypt' 'Gambia'
 'Iran' 'Kyrgyzstan' 'Laos' 'Macau' 'Macedonia' 'Micronesia' 'Niue'
 'North Korea' 'Russia' 'Saint Kitts and Nevis' 'Saint Lucia'
 'Saint Vincent and the Grenadines' 'Slovakia' 'South Korea' 'Taiwan'
 'Venezuela' 'Western Sahara' 'Yemen']
missing b (spread rate) for the following locations:
['Bahamas' 'British United States' 'Brunei' 'Cape Verde'
 'Congo (Brazzaville)' 'Congo (Kinshasa)' 'Co

In [10]:
# For checking values
for tup in n:
    if tup[0] == "Singapore":
        print(tup)

('Singapore', SIRD(b=0.5, k=0.8, w=0.1, N=4114826.0, s=0.9, i=4.860472836518482e-06, r=0, d=0, xi=0, records=Records(plabels=['susceptible', 'cases', 'recoveries', 'deaths'], pcolors=['blue', 'red', 'yellow', 'black'], records=[[], [], [], []])), array([-0.00788965, -0.15829407]))


In [11]:
from simulator import SIRDGraph
srirdg: SIRDGraph = SIRDGraph(n, e,
    width_factor=1/2,
    size_factor=1/400000,
    font_size=1,
    color_factor=100000,
    y_offset=0.01,
    font_color="blue")

In [12]:
srirdg.run(10)

In [None]:
srirdg.save_dict()