# Datasets openflights
https://openflights.org/data.html

### Unique IDs :

Airports : Airport_ID

Airlines : Airline_ID

Planes : Name  

In [1]:
import numpy as np
import pandas as pd
import os #Concatenate path for file

In [2]:
# Load the data
def load_dataset(filename, sep=','):
    dirpath = "./data"
    filepath = os.path.join(dirpath, filename)
    with open(filepath, 'r', encoding='utf8') as file:
        raw_data = pd.read_csv(file, sep, header=None)
    return raw_data

In [3]:
# Load data
airports_raw = load_dataset('openflight/airports.dat')
airlines_raw = load_dataset('openflight/airlines.dat')
planes_raw = load_dataset('openflight/planes.dat')
routes_raw = load_dataset('openflight/routes.dat')
# Name the columns
airports_raw.columns = ['Airport_ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude', 'Altitude', 'Timezone', 'DST', 'Tz_database', 'Type', 'Source']
airlines_raw.columns = ['Airline_ID', 'Name', 'Alias', 'IATA', 'ICAO', 'Callsign', 'Country','Active']
planes_raw.columns   = ['Name', 'IATA', 'ICAO']
routes_raw.columns   = ['Airline', 'Airline_ID', 'src_airport', 'src_airport_ID', 'dest_airport', 'dest_airport_ID', 'Codeshare', 'Stops', 'Equipment']

In [4]:
def remove_backslash_n(dataset):
    return dataset.replace('\\N', np.nan)


In [91]:
def clean_airlines(airlines_data_raw):
    airlines = airlines_data_raw.drop(airlines_data_raw[airlines_data_raw.Airline_ID == -1].index).reset_index()
    airlines = remove_backslash_n(airlines)
    airlines[airlines.Airline_ID == 1] = airlines[airlines.Airline_ID == 1].replace('-', np.nan)
    return airlines

In [92]:
# Cleaned dataset
airports  = remove_backslash_n(airports_raw)
airlines  = clean_airlines(airlines_raw)
planes    = remove_backslash_n(planes_raw)
routes    = remove_backslash_n(routes_raw)