# Pandemic Analysis - Part II

### Incorporating Context for Choosing Best Optimal Placement

In [None]:
import networkx as nx
import random
import pandas as pd
import itertools

In [None]:
pandemic_g_nums = nx.read_graphml('pandemic.graphml.txt')

# dicts from city names to numbers, and from numbers to city names.
city_names_to_num = {tup[1]['label']:tup[0] for tup in pandemic_g_nums.nodes(data=True)}
city_num_to_names = {tup[0]:tup[1]['label'] for tup in pandemic_g_nums.nodes(data=True)}

# Relabel nodes to city names.
pandemic_g = nx.relabel_nodes(pandemic_g_nums, city_num_to_names)

In [None]:
# Loop through all the json results files, load them into dataframe, rename the columns, 
# and concat them all together.
df = pd.concat([pd.read_json('{}_nodes.json'.format(i)).
                rename(columns={0:'Diameter', 1:'Stations', 2:'Num_Edges'})
                for i in xrange(1,6)],
                ignore_index=True).drop('Num_Edges', axis=1)
# Add column for number of stations per combination.
df['Num_Stations'] = df.Stations.map(lambda x: len(x))
# Add column with names of staitons.
df['City_Names'] = df.Stations.map(lambda stations: [city_num_to_names[s] for s in stations])

###### Filter for the optimal combos in each Number of Stations group

In [None]:
min_dia_by_num_of_stations = df.groupby('Num_Stations').apply(lambda grp: grp[grp.Diameter==grp.Diameter.min()])
min_dia_by_num_of_stations

###### Count number of optimal combos in each group

In [None]:
# Get the number of combos with the minimum diameter by number of stations.
min_dia_summary = pd.concat([df.groupby('Num_Stations')['Diameter'].min(),
           df.groupby('Num_Stations').apply(lambda grp: len(grp[grp.Diameter==grp.Diameter.min()])), 
           df.groupby('Num_Stations').size()],
         axis=1).rename(columns={'Diameter':'Min diameter', 0:'Combos with min diameter', 1:'Total combos'})
min_dia_summary

### Function: Caclulate the longest shortest path between infected cities for a given set of research stations

In [None]:
def get_longest_shortest_path(graph, stations, infected_cities):
    """Add edges between the stations and calculate the longest shortest
    path between infected cities."""
    # Make a copy of the graph because we're going to add edges.
    h = graph.copy()
    #print(stations)
    # Take the list of stations and add edges between them.
    for c in itertools.combinations(stations, 2):
        if c[0] in h and c[1] in h:
            h.add_edge(c[0], c[1])
        else:
            print('missing nodes ' + c[0] + ' ' + c[1])
    # Find the lengths of the shortest paths between each pair of cities
    # with research stations.
    return max(nx.shortest_path_length(h, c[0], c[1]) for c in itertools.combinations(infected_cities, 2))

In [None]:
infected_cities = {'Atlanta', 'Washington', 'London', 'Madrid', 'Milan', 'Algiers', 
                   'Kinshasa', 'Jakarta', 'Shanghai', 'Hong Kong', 'Tokyo', 'Sydney', 
                   'Santiago'}

###### Calculate starting longest shortest path between infected cities

In [None]:
get_longest_shortest_path(pandemic_g, [], infected_cities)

![board](images\infected_cities.png)

![board](images\infected_cities_min_max_3.png)

###### Calculate the longest shortest path length for each optimal combo

In [None]:
# Calculate longest shortest path lengths
results = {}
for stations in min_dia_by_num_of_stations.City_Names:
    results[tuple(stations)] = get_longest_shortest_path(pandemic_g, stations, infected_cities)

# Add as new column
min_dia_by_num_of_stations['Max_shortest_for_infected_cities'] = min_dia_by_num_of_stations.City_Names.map(lambda c: results[tuple(c)])

# Sort each Num_Stations groupy by Max_shortest_for_infected_cities
min_dia_by_num_of_stations.sort_values(by=['Num_Stations', 'Max_shortest_for_infected_cities'])

###### Filter for just the optimal combos that reduce the longest shortest path between infected cities the most

In [None]:
min_max_shortest_for_infected_cities = (min_dia_by_num_of_stations.groupby('Num_Stations').apply(lambda grp: 
                                                          grp[grp.Max_shortest_for_infected_cities==
                                                              grp.Max_shortest_for_infected_cities.min()]))
min_max_shortest_for_infected_cities.reset_index(level=[2], drop=True).drop(['Stations', 'Num_Stations'], axis=1)

###### Did we reduce the number of optimal combos to choose from?

In [None]:
def highlight_fewer(s):
    """Helper function to highlight dataframe cells."""
    if s.name in ['Combos_with_min_longest', 'Combos_with_min_diameter']:
        return ['background-color: lime' if b else '' for b in min_longest_summary.Fewer_choices]
    else:
        return ['' for b in min_longest_summary.Fewer_choices]

In [None]:
min_longest_summary = (pd.DataFrame(min_max_shortest_for_infected_cities
              .groupby(['Num_Stations', 'Diameter', 'Max_shortest_for_infected_cities'])
              .size())
 .rename(columns={0:'Combos_with_min_longest'}).reset_index(['Diameter', 'Max_shortest_for_infected_cities'])
.assign(Combos_with_min_diameter=min_dia_summary['Combos with min diameter'])
.assign(Fewer_choices=lambda df: df.Combos_with_min_longest<df.Combos_with_min_diameter))
# Different order for columns.
cols = ['Diameter', 'Combos_with_min_diameter', 'Max_shortest_for_infected_cities', 'Combos_with_min_longest', 'Fewer_choices']
# Colors cells where we reduced the number of choices.
min_longest_summary[cols].style.apply(highlight_fewer)

###### One of the three-city combos that reduces longest shortest path between infected cities to six  
Atlanta, San Francisco, Cairo

In [None]:
min_max_shortest_for_infected_cities.loc[3].iloc[0]

In [None]:
h = pandemic_g.copy()
#print(stations)
# Take the list of stations and add edges between them.
for c in itertools.combinations(['Atlanta', 'San Francisco', 'Cairo'], 2):
    if c[0] in h and c[1] in h:
        h.add_edge(c[0], c[1])
    else:
        print('missing nodes ' + c[0] + ' ' + c[1])
            
[(c[0], c[1], ', '.join(nx.shortest_path(h, c[0], c[1]))) for c in itertools.combinations(infected_cities, 2) 
 if nx.shortest_path_length(h, c[0], c[1])==6]

![board](images\infected_cities_min_max_3_6.png)