In [149]:
# Network Algorithms Homework Project


In [150]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from geopy import distance
from geopy import Point
from utils import *

# pd.set_option("display.max_rows", None, "display.max_columns", None)


In [151]:
# Obtain the data
airports = pd.read_csv('airports.csv')
routes = pd.read_csv('routes.csv')

In [152]:
# returns all the information about an airport by its IATA
def airport_info(data_airports, iata):
    return data_airports.loc[data_airports['IATA'] == iata]

In [153]:
def filter_na(df, column):
    na_rows = df[column].str.len() < 3
    return df[~na_rows]

# Filter the N/A values in the data
filtered_airports = filter_na(airports, "IATA")
filtered_routes = filter_na(routes, 'Source airport ID')
filtered_routes = filter_na(filtered_routes, 'Destination airport ID')

# change the datatype in routes
filtered_routes['Source airport ID'] = filtered_routes['Source airport ID'].astype('int64')
filtered_routes['Destination airport ID'] = filtered_routes['Destination airport ID'].astype('int64')

In [154]:
def get_countries_of(region):
    if (region == 'Europe'):
        import requests
        response = requests.get("https://restcountries.com/v3.1/region/europe")
        l = response.json()
        europe = []
        for i in range(len(l)):
            europe.append(l[i]['name']['common'])
        return europe
    # future support for other regions

def filter_to_europe(df, attr):
    europe = get_countries_of('Europe')
    return df[df[attr].isin(europe)]

# Choose only one region (europe in our example)
filtered_european_airports = filter_to_europe(filtered_airports, 'Country')

In [155]:
# Filter routes according to filtered airports
airport_id = filtered_european_airports['Airport ID']
filtered_european_routes = filtered_routes[filtered_routes['Source airport ID'].isin(airport_id) & filtered_routes['Destination airport ID'].isin(airport_id)]

In [156]:
def get_coords(airport_id, airports):
    airport = airports.loc[airports['Airport ID'] == airport_id]
    return (airport['Latitude'], airport['Longitude'])

In [157]:
def add_coords_to_routes(routes, airports):
    sources_latitude = []
    sources_longitude = []
    destinations_latitude = []
    destinations_longitude = []

    for ind in routes.index:
        source_coords = get_coords(routes['Source airport ID'][ind], airports)
        sources_latitude.append(source_coords[0].to_string().split(' ')[-1])
        sources_longitude.append(source_coords[1].to_string().split(' ')[-1])

        destination_coords = get_coords(routes['Destination airport ID'][ind], airports)
        destinations_latitude.append(destination_coords[0].to_string().split(' ')[-1])
        destinations_longitude.append(destination_coords[1].to_string().split(' ')[-1])
    
    routes['Source latitude'] = sources_latitude
    routes['Source longitude'] = sources_longitude
    routes['Destination latitude'] = destinations_latitude
    routes['Destination longitude'] = destinations_longitude

    return routes

In [158]:
modified_routes = add_coords_to_routes(filtered_european_routes, filtered_european_airports)
# print(modified_routes.head)

In [159]:
def get_dists(routes):
    return routes.apply(
        lambda row:
            distance.distance(
                (row['Source latitude'], row['Source longitude']),
                (row['Destination latitude'], row['Destination longitude'])
            ).km,
        axis = 1
    )

In [160]:
dist = get_dists(modified_routes)

# df = modified_routes[['Source airport','Destination airport']]
# df['Dist'] = dist

# array dist is an array of all the distances between sources and destinations that should be created and filled by Rustam
# once it's filled a new data frame with the Source, Destination and Distance values will be created
distances = pd.Series(dist, name = 'Distance')
data = [filtered_european_routes['Source airport'].reset_index(drop=True),filtered_european_routes['Destination airport'].reset_index(drop=True), distances.reset_index(drop=True)]

# new DataFrame only with the Source, Destination and Distance values
df = pd.DataFrame(data).T
# print(df)

      Source airport Destination airport     Distance
0                AER                 KZN  1507.989671
1                ASF                 KZN  1040.943243
2                ASF                 MRV   449.036643
3                CEK                 KZN   773.126226
4                CEK                 OVB  1343.161093
...              ...                 ...          ...
16372            ORY                 FNC  2391.848773
16373            ORY                 LIS  1439.045374
16374            ORY                 OPO  1202.317838
16375            ORY                 VKO  2476.274933
16376            VKO                 ORY  2476.274933

[16377 rows x 3 columns]


In [148]:
#Creating a graph from the filtered european routes
G = nx.from_pandas_edgelist(df, source='Source airport', target='Destination airport', edge_attr=True, create_using=nx.DiGraph)

# Network Density
network_density = network_density(G)
print("Network Density = ", network_density)

# Network Diameter
netwirk_diameter = network_diameter(G, 'Dist')
print("Network Diameter = ", network_density)

# Network Average Path Length
napl = network_average_path_length(G, 'Dist')
print("Network Average Path Length = ", napl)

# Get all the paths in Graph
paths = get_all_pathes(G, 'Dist')
print("Paths: ", paths)

#Closeness Centrality of Airport
cc_CDG = closeness_centrality(G, 'CDG', 'Dist')
cc_LHR = closeness_centrality(G, 'LHR', 'Dist')
cc_FRA = closeness_centrality(G, 'FRA', 'Dist')

print("Closeness Centrality of Charles de Gaulle Airport: ", cc_CDG)
print("Closeness Centrality of Heathrow Airport: ", cc_LHR)
print("Closeness Centrality of Frankfurt Airport: ", cc_LHR)


degree_CDG = degree(G, 'CDG')
inflow_CDG = degree_inflow(G, 'CDG')
outflow_CDG = degree_outflow(G, 'CDG')

print("Degree of Charles de Gaulle Airport: ", degree_CDG)
print("Inflow Degree of Charles de Gaulle Airport: ", inflow_CDG)
print("Outflow Degree of Charles de Gaulle Airport: ", outflow_CDG)

degree_LHR = degree(G, 'LHR')
inflow_LHR = degree_inflow(G, 'LHR')
outflow_LHR = degree_outflow(G, 'LHR')

print("Degree of Heathrow Airport: ", degree_LHR)
print("Inflow Degree of Heathrow Airport: ", inflow_LHR)
print("Outflow Degree of Heathrow Airport: ", outflow_LHR)

degree_FRA = degree(G, 'FRA')
inflow_FRA = degree_inflow(G, 'FRA')
outflow_FRA = degree_outflow(G, 'FRA')

print("Degree of Frankfurt Airport: ", degree_FRA)
print("Inflow Degree of Frankfurt Airport: ", inflow_FRA)
print("Outflow Degree of Frankfurt Airport: ", outflow_FRA)

Charles_Frankfurt = get_path(G, 'CDG', 'FRA' , 'Dist')
Frankfurt_Heathrow = get_path(G, 'FRA', 'LHR', 'Dist')

print("Path CDG-->FRA ", Charles_Frankfurt)
print("Path FRA-->LHR ", Frankfurt_Heathrow)



SyntaxError: invalid syntax (<ipython-input-148-b89c614b5375>, line 54)

In [122]:


# array dist is an array of all the distances between sources and destinations that should be created and filled by Rustam
# once it's filled a new data frame with the Source, Destination and Distance values will be created
distances = pd.Series(dist, name = 'Distance')
data = [filtered_european_routes['Source airport'].reset_index(drop=True),filtered_european_routes['Destination airport'].reset_index(drop=True), distances.reset_index(drop=True)]

# new DataFrame only with the Source, Destination and Distance values
df = pd.DataFrame(data).T
print(df)

      Source airport Destination airport     Distance
0                AER                 KZN  1507.989671
1                ASF                 KZN  1040.943243
2                ASF                 MRV   449.036643
3                CEK                 KZN   773.126226
4                CEK                 OVB  1343.161093
...              ...                 ...          ...
16372            ORY                 FNC  2391.848773
16373            ORY                 LIS  1439.045374
16374            ORY                 OPO  1202.317838
16375            ORY                 VKO  2476.274933
16376            VKO                 ORY  2476.274933

[16377 rows x 3 columns]
