In [None]:
# Network Algorithms Homework Project


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from geopy import distance
from geopy import Point
from utils import *

# pd.set_option("display.max_rows", None, "display.max_columns", None)


In [None]:
# Obtain the data
airports = pd.read_csv('airports.csv')
routes = pd.read_csv('routes.csv')

In [None]:
# returns all the information about an airport by its IATA
def airport_info(data_airports, iata):
    return data_airports.loc[data_airports['IATA'] == iata]

In [None]:
def filter_na(df, column):
    na_rows = df[column].str.len() < 3
    return df[~na_rows]

# Filter the N/A values in the data
filtered_airports = filter_na(airports, "IATA")
filtered_routes = filter_na(routes, 'Source airport ID')
filtered_routes = filter_na(filtered_routes, 'Destination airport ID')

# change the datatype in routes
filtered_routes['Source airport ID'] = filtered_routes['Source airport ID'].astype('int64')
filtered_routes['Destination airport ID'] = filtered_routes['Destination airport ID'].astype('int64')

In [None]:
def get_countries_of(region):
    if (region == 'Europe'):
        import requests
        response = requests.get("https://restcountries.com/v3.1/region/europe")
        l = response.json()
        europe = []
        for i in range(len(l)):
            europe.append(l[i]['name']['common'])
        return europe
    # future support for other regions

def filter_to_europe(df, attr):
    europe = get_countries_of('Europe')
    return df[df[attr].isin(europe)]

def filter_to_countries(df, countries):
    return df[df['Country'].isin(countries)]

# Choose only one region (europe in our example)
filtered_european_airports = filter_to_europe(filtered_airports, 'Country')

# Choose only one country (France in our example)
filtered_some_airports = filter_to_countries(filtered_airports, ['France', 'Germany', 'United Kingdom'])


In [None]:
# Filter routes according to filtered airports
airport_id = filtered_european_airports['Airport ID']
filtered_european_routes = filtered_routes[filtered_routes['Source airport ID'].isin(airport_id) & filtered_routes['Destination airport ID'].isin(airport_id)]

airport_id = filtered_some_airports['Airport ID']
filtered_some_routes = filtered_routes[filtered_routes['Source airport ID'].isin(airport_id) & filtered_routes['Destination airport ID'].isin(airport_id)]

In [None]:
def get_coords(airport_id, airports):
    airport = airports.loc[airports['Airport ID'] == airport_id]
    return (airport['Latitude'], airport['Longitude'])

In [None]:
def add_coords_to_routes(routes, airports):
    sources_latitude = []
    sources_longitude = []
    destinations_latitude = []
    destinations_longitude = []

    for ind in routes.index:
        source_coords = get_coords(routes['Source airport ID'][ind], airports)
        sources_latitude.append(source_coords[0].to_string().split(' ')[-1])
        sources_longitude.append(source_coords[1].to_string().split(' ')[-1])

        destination_coords = get_coords(routes['Destination airport ID'][ind], airports)
        destinations_latitude.append(destination_coords[0].to_string().split(' ')[-1])
        destinations_longitude.append(destination_coords[1].to_string().split(' ')[-1])
    
    routes['Source latitude'] = sources_latitude
    routes['Source longitude'] = sources_longitude
    routes['Destination latitude'] = destinations_latitude
    routes['Destination longitude'] = destinations_longitude

    return routes

In [None]:
# modified_routes = add_coords_to_routes(filtered_european_routes, filtered_european_airports)

modified_some_routes = add_coords_to_routes(filtered_some_routes, filtered_some_airports)

In [None]:
def get_dists(routes):
    return routes.apply(
        lambda row:
            distance.distance(
                (row['Source latitude'], row['Source longitude']),
                (row['Destination latitude'], row['Destination longitude'])
            ).km,
        axis = 1
    )

In [None]:
# dist = get_dists(modified_routes)
dist_some = get_dists(modified_some_routes)

# array dist is an array of all the distances between sources and destinations that should be created and filled by Rustam
# once it's filled a new data frame with the Source, Destination and Distance values will be created
# distances = pd.Series(dist, name = 'Distance')
distances_some = pd.Series(dist_some, name = 'Distance')
# data = [filtered_european_routes['Source airport'].reset_index(drop=True),filtered_european_routes['Destination airport'].reset_index(drop=True), distances.reset_index(drop=True)]
data_some = [filtered_some_routes['Source airport'].reset_index(drop=True),filtered_some_routes['Destination airport'].reset_index(drop=True), distances_some.reset_index(drop=True)]

# new DataFrame only with the Source, Destination and Distance values
# df = pd.DataFrame(data).T
df_some = pd.DataFrame(data_some).T
# print(df)

In [None]:
#Creating a graph from the filtered european routes
# G = nx.from_pandas_edgelist(df, source='Source airport', target='Destination airport', edge_attr=True, create_using=nx.DiGraph)
G = nx.from_pandas_edgelist(df_some, source='Source airport', target='Destination airport', edge_attr=True, create_using=nx.DiGraph)
nx.draw(G, with_labels=True)
# Network Density
network_densit = network_density(G)
print("Network Density = ", network_densit)

# Network Diameter
network_diamete = network_diameter(G, 'Distance')
print("Network Diameter = ", network_diamete)

# Network Average Path Length
napl = network_average_path_length(G, 'Distance')
print("Network Average Path Length = ", napl)

# # Get all the paths in Graph
# paths = get_all_pathes(G, 'Dist')
# print("Paths: ", paths)

#Closeness Centrality of Airport
cc_CDG = closeness_centrality(G, 'CDG', 'Distance')
cc_LHR = closeness_centrality(G, 'LHR', 'Dist')
cc_FRA = closeness_centrality(G, 'FRA', 'Distance')

print("Closeness Centrality of Charles de Gaulle Airport: ", cc_CDG)
print("Closeness Centrality of Heathrow Airport: ", cc_LHR)
print("Closeness Centrality of Frankfurt Airport: ", cc_FRA)


degree_CDG = degree(G, 'CDG')
inflow_CDG = degree_inflow(G, 'CDG')
outflow_CDG = degree_outflow(G, 'CDG')

print("Degree of Charles de Gaulle Airport: ", degree_CDG)
print("Inflow Degree of Charles de Gaulle Airport: ", inflow_CDG)
print("Outflow Degree of Charles de Gaulle Airport: ", outflow_CDG)

degree_LHR = degree(G, 'LHR')
inflow_LHR = degree_inflow(G, 'LHR')
outflow_LHR = degree_outflow(G, 'LHR')

print("Degree of Heathrow Airport: ", degree_LHR)
print("Inflow Degree of Heathrow Airport: ", inflow_LHR)
print("Outflow Degree of Heathrow Airport: ", outflow_LHR)

degree_FRA = degree(G, 'FRA')
inflow_FRA = degree_inflow(G, 'FRA')
outflow_FRA = degree_outflow(G, 'FRA')

print("Degree of Frankfurt Airport: ", degree_FRA)
print("Inflow Degree of Frankfurt Airport: ", inflow_FRA)
print("Outflow Degree of Frankfurt Airport: ", outflow_FRA)

Charles_Frankfurt = get_path(G, 'CDG', 'FRA' , 'Distance')
Charles_Hamburg = get_path(G, 'CDG', 'HAM', 'Distance')
Frankfurt_Heathrow = get_path(G, 'FRA', 'LHR', 'Dist')

print("Path CDG-->FRA ", Charles_Frankfurt)
# print("Path CDG-->HAM ", Charles_Hamburg)
print("Path FRA-->LHR ", Frankfurt_Heathrow)



In [None]:


# # array dist is an array of all the distances between sources and destinations that should be created and filled by Rustam
# # once it's filled a new data frame with the Source, Destination and Distance values will be created
# distances = pd.Series(dist, name = 'Distance')
# data = [filtered_european_routes['Source airport'].reset_index(drop=True),filtered_european_routes['Destination airport'].reset_index(drop=True), distances.reset_index(drop=True)]

# # new DataFrame only with the Source, Destination and Distance values
# df = pd.DataFrame(data).T
# print(df)