In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import json

In [2]:
EU_AIRPORTS_FILE_CLEANED_CSV = "../data/cleaned/eu_cleaned_airports.csv"
EU_AIRPORTS_FILE_CLEANED_JSON = "../data/cleaned/eu_cleaned_airports.json"
EU_AIRLINES_FILE_CLEANED_CSV = "../data/cleaned/eu_cleaned_airlines.csv"
EU_COUNTRIES_FILE_CLEANED_JSON = "../data/cleaned/eu_cleaned_countries.json"
EU_ROUTES_FILE_CLEANED = "../data/cleaned/eu_cleaned_routes.csv"
EU_WORLD_DATA_FILE = "../data/cleaned/eu_world_data.json"

# Read Data

In [3]:
with open(EU_COUNTRIES_FILE_CLEANED_JSON) as f:
    eu_countries = json.load(f)

In [4]:
with open(EU_AIRPORTS_FILE_CLEANED_JSON) as f:
    eu_airports = json.load(f)

In [5]:
eu_airports_df = pd.read_csv(EU_AIRPORTS_FILE_CLEANED_CSV, 
                              index_col = 0, 
                              header=0)
# eu_airports_df.head()

In [6]:
eu_airlines_df = pd.read_csv(EU_AIRLINES_FILE_CLEANED_CSV, 
                              index_col = 0, 
                              header=0)
# eu_airlines_df.head()

In [7]:
eu_routes_df = pd.read_csv(EU_ROUTES_FILE_CLEANED, 
                              index_col = 0, 
                              header=0)
# eu_routes_df.head()

# Create EU World

In [8]:
world = {}

world["countries"] = eu_countries
world["airports"] = eu_airports

In [9]:
eu_airports_by_country = eu_airports_df.groupby('Country').agg(lambda x: list(x))
for country in eu_airports_by_country.index:
    if country in world["countries"]:
        airports_list = eu_airports_by_country.loc[(country)]
        airports_count = len(airports_list[0])
        world["countries"][country]["airports"] = []
        for airport_index in range(airports_count):
            world["countries"][country]["airports"].append(airports_list[2][airport_index])

In [10]:
eu_airlines_by_country = eu_airlines_df.groupby('Country').agg(lambda x: list(x))
for country in eu_airlines_by_country.index:
    if country in world["countries"]:
        airline_list = eu_airlines_by_country.loc[(country)]
        airline_count = len(airline_list[0]) 
        world["countries"][country]["airlines"] = {}
        for airline_index in range(airline_count):
            world["countries"][country]["airlines"][airline_list[1][airline_index]] = {
                "name": airline_list[0][airline_index],
                "iata": airline_list[1][airline_index],
                "airplanes": {}
            }

In [11]:
eu_routes_by_country_airline = eu_routes_df[["Airline_Country",
                                             "Airline_Name",
                                             "Airline_Code",
                                             "Airport_Code_Source",
                                             "Name_Source",
                                             "Country_Source",
                                             "Airport_Code_Destination",
                                             "Name_Destination",
                                             "Country_Destination",
                                             "Distance_Km"]].groupby(["Airline_Country",
                                                                      "Airline_Code"]).agg(lambda x: list(x))

In [12]:
for country,airline in eu_routes_by_country_airline.index:
    if country in world["countries"]:
        if airline in world["countries"][country]["airlines"]:
            route_list = eu_routes_by_country_airline.loc[(country,airline)]
            route_count = len(route_list[0])
            world["countries"][country]["airlines"][airline]["airplanes"] = {}
            for route_index in range(route_count):
                source_country = route_list[3][route_index]
                source_airport = route_list[1][route_index]
                dest_country = route_list[6][route_index]
                dest_airport = route_list[4][route_index]
                if source_country in world["countries"]:
                    if dest_country in world["countries"]:
                        if source_airport in world["airports"]:
                            if dest_airport in world["airports"]:
                                airplane_id = airline + "-" + source_airport + "-" + dest_airport
                                world["countries"][country]["airlines"][airline]["airplanes"][airplane_id] = {
                                    "airline": airline,
                                    "source_airport": source_airport,
                                    "destination_airport": dest_airport,
                                    "distance": route_list[7][route_index]
                                }

In [13]:
# world["countries"]["Belgium"]["airlines"]["SN"]["airplanes"]["SN-BRU-FCO"]

In [14]:
# world["airports"]

# Save EU World

In [15]:
with open(EU_WORLD_DATA_FILE, 'w') as fp:
    json.dump(world, fp)