In [1]:
# Network Algorithms Homework Project


In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from geopy import distance

# pd.set_option("display.max_rows", None, "display.max_columns", None)


In [3]:
# Obtain the data
airports = pd.read_csv('airports.csv')
routes = pd.read_csv('routes.csv')

In [4]:
def filter_na(df, column):
    na_rows = df[column].str.len() < 3
    return df[~na_rows]

# Filter the N/A values in the data
filtered_airports = filter_na(airports, "IATA")
filtered_routes = filter_na(routes, 'Source airport ID')
filtered_routes = filter_na(filtered_routes, 'Destination airport ID')

# change the datatype in routes
filtered_routes['Source airport ID'] = filtered_routes['Source airport ID'].astype('int64')
filtered_routes['Destination airport ID'] = filtered_routes['Destination airport ID'].astype('int64')

In [5]:
def get_countries_of(region):
    if (region == 'Europe'):
        import requests
        response = requests.get("https://restcountries.com/v3.1/region/europe")
        l = response.json()
        europe = []
        for i in range(len(l)):
            europe.append(l[i]['name']['common'])
        return europe
    # future support for other regions

def filter_to_europe(df, attr):
    europe = get_countries_of('Europe')
    return df[df[attr].isin(europe)]

# Choose only one region (europe in our example)
filtered_european_airports = filter_to_europe(filtered_airports, 'Country')

In [6]:
# Filter routes according to filtered airports
airport_id = filtered_european_airports['Airport ID']
filtered_european_routes = filtered_routes[filtered_routes['Source airport ID'].isin(airport_id) & filtered_routes['Destination airport ID'].isin(airport_id)]
# print(filtered_european_routes)

In [7]:
def get_dists(routes, airports):
    dist = list()

    for ind in routes.index:

        try:
            source_id = routes['Source airport ID'][ind]
            destination_id = routes['Destination airport ID'][ind]

            source = airports.loc[airports['Airport ID'] == source_id]
            destination = airports.loc[airports['Airport ID'] == destination_id]

            if(
                len(source) == 0
            ):
                print('skipped the following source ID:')
                print(source_id)
                continue
            
            if(
                len(destination) == 0
            ):
                print('skipped the following destination ID:')
                print(destination_id)
                continue

            dist.append(distance.distance(
                (float(source['Latitude'].to_string().split(' ')[-1]), float(source['Longitude'].to_string().split(' ')[-1])),
                (float(destination['Latitude'].to_string().split(' ')[-1]), float(destination['Longitude'].to_string().split(' ')[-1]))
            ).km)
        except:
            print('----')
            print(source_id, destination_id)

    return dist

In [8]:
dist = get_dists(filtered_european_routes, filtered_european_airports)

In [None]:

# returns all the information about an airport by its IATA
def airport_info(data_airports, iata):
    return data_airports.loc[data_airports['IATA'] == iata]

# to display all the columns
# pd.set_option('display.max_columns', None)-

# array dist is an array of all the distances between sources and destinations that should be created and filled by Rustam
# once it's filled a new data frame with the Source, Destination and Distance values will be created
distances = pd.Series(np.array(dist), name = 'Distance')
data = [filtered_european_routes['Source airport'].reset_index(drop=True),filtered_european_routes['Destination airport'].reset_index(drop=True), pd.Series(np.array(dist), name = 'Distance').reset_index(drop=True)]

# new DataFrame only with the Source, Destination and Distance values
df = pd.DataFrame(data).T
print(df)

      Source airport Destination airport     Distance
0                AER                 KZN  1507.989671
1                ASF                 KZN  1040.943243
2                ASF                 MRV   449.036643
3                CEK                 KZN   773.126226
4                CEK                 OVB  1343.161093
5                DME                 KZN   718.084194
6                DME                 NBC   895.418997
7                DME                 UUA   954.628759
8                EGO                 KGD  1175.219641
9                EGO                 KZN  1010.815895
10               KGD                 EGO  1175.219641
11               KZN                 AER  1507.989671
12               KZN                 ASF  1040.943243
13               KZN                 CEK   773.126226
14               KZN                 DME   718.084194
15               KZN                 EGO  1010.815895
16               KZN                 LED  1220.784311
17               KZN        