In [1]:
import numpy as np
import pandas as pd
import datetime

In [2]:
data_path = "../../data/OAG.csv"
data=pd.read_csv(data_path)

In [3]:
print(data.columns)

Index(['Carrier Code', 'Carrier Name', 'Flight No', 'ICAO AL',
       'Dep Airport Code', 'Dep Airport Name', 'Arr Airport Code',
       'Arr Airport Name', 'International/Domestic', 'Local Days Of Op',
       'Local Days Of Op 1', 'Local Days Of Op 2', 'Local Days Of Op 3',
       'Local Days Of Op 4', 'Local Days Of Op 5', 'Local Days Of Op 6',
       'Local Days Of Op 7', 'Local Days Of Op Arr', 'Effective From',
       'Effective To', 'Local Dep Time', 'Local Arr Time', 'Local Arr Day',
       'Elapsed Time', 'Flying Time', 'Ground Time', 'General Aircraft Code',
       'General Aircraft Name', 'Specific Aircraft Code',
       'Specific Aircraft Name', 'Equipment Group', 'No of Stops',
       'Stopping Airport', 'Month', 'Freight Class'],
      dtype='object')


In [4]:
data=data[data["International/Domestic"]=="Domestic"]

In [5]:
not_columns = set(
    [
        'International/Domestic',
        'Local Days Of Op',
        'Local Days Of Op 1',
        'Local Days Of Op 2',
        'Local Days Of Op 3',
        'Local Days Of Op 4',
        'Local Days Of Op 5',
        'Local Days Of Op 6',
        'Local Days Of Op 7',
        'Local Days Of Op Arr',
        'Local Arr Day',
        'Elapsed Time',
        'Ground Time',
        'Month',
        'No of Stops',
        'Stopping Airport',
        'Freight Class',
        'Dep Airport Name',
        'Arr Airport Name',
        'Carrier Code',
        'Carrier Name',
        'General Aircraft Code',
        'General Aircraft Name',
        'Specific Aircraft Code',
        'Specific Aircraft Name',
        'Equipment Group',
        'Flight No',
        'ICAO AL',
    ]
)

In [6]:
print(len(data), "data")
print(len(data.columns)-len(not_columns), "columns")
for column in data.columns:
    if column in not_columns:
        continue
    print(column, len(data[column].unique()))

data["Effective From"]

79833 data
7 columns
Dep Airport Code 79
Arr Airport Code 79
Effective From 365
Effective To 365
Local Dep Time 229
Local Arr Time 231
Flying Time 48


1206        1/4/2019
1207      28/10/2019
1208        1/1/2019
1209      21/10/2019
1210      28/10/2019
             ...    
186170      1/7/2019
186171     13/7/2019
186172      1/9/2019
186173     1/10/2019
186174    25/11/2019
Name: Effective From, Length: 79833, dtype: object

In [7]:
print(data[data["Flying Time"]=="11:09"][["Dep Airport Name","Arr Airport Name"]])
print(data[data["Flying Time"]=="8:59"][["Dep Airport Name","Arr Airport Name"]])
"飛行時間がやけに長い。なにかしらのトラブルか？"

                 Dep Airport Name     Arr Airport Name
1427   Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
1446   Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
1665   Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
1688   Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
1728   Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
41829  Nagasaki, Nagasaki Airport  Tokyo Intl (Haneda)
          Dep Airport Name            Arr Airport Name
1347   Tokyo Intl (Haneda)  Nagasaki, Nagasaki Airport
1441   Tokyo Intl (Haneda)  Nagasaki, Nagasaki Airport
1682   Tokyo Intl (Haneda)  Nagasaki, Nagasaki Airport
31360  Tokyo Intl (Haneda)  Nagasaki, Nagasaki Airport


'飛行時間がやけに長い。なにかしらのトラブルか？'

In [8]:
"空港の名前の集合が発着で一致"
for check in ["Name", "Code"]:
    dep_code = set(data["Dep Airport "+check].unique())
    arr_code = data["Arr Airport "+check].unique()
    for code in arr_code:
        if not code in dep_code:
            print(check, code)

In [9]:
columns = set(
    [
        "Dep Airport Code",
        "Arr Airport Code",
        "Effective From",
        "Effective To",
        "Local Dep Time",
        "Local Arr Time",
        "Flying Time",
    ]
)
timetable = pd.DataFrame({column:data[column] for column in columns})
timetable

Unnamed: 0,Dep Airport Code,Arr Airport Code,Effective To,Local Arr Time,Effective From,Local Dep Time,Flying Time
1206,HND,KMJ,30/6/2019,900,1/4/2019,710,1:50
1207,HND,KMJ,31/12/2019,910,28/10/2019,710,2:00
1208,KMJ,HND,24/3/2019,915,1/1/2019,735,1:40
1209,HND,KMJ,26/10/2019,1405,21/10/2019,1215,1:50
1210,KMJ,HND,31/12/2019,1120,28/10/2019,945,1:35
...,...,...,...,...,...,...,...
186170,TRA,MMY,7/7/2019,1655,1/7/2019,1630,0:25
186171,TRA,MMY,25/8/2019,1650,13/7/2019,1625,0:25
186172,TRA,MMY,29/9/2019,1655,1/9/2019,1630,0:25
186173,TRA,MMY,24/11/2019,1705,1/10/2019,1640,0:25


In [10]:
a2p_path = "../../data/airports.csv"
a2p = pd.read_csv(a2p_path)
a2p = a2p[a2p["iso_country"]=="JP"]
a2p = a2p[a2p["type"] != "heliport"]
a2p = a2p[a2p["type"] != "closed"]
a2p = a2p.dropna(subset = ["iata_code", "latitude_deg", "longitude_deg"])

In [11]:
print(a2p.columns)

Index(['id', 'ident', 'type', 'name', 'latitude_deg', 'longitude_deg',
       'elevation_ft', 'continent', 'iso_country', 'iso_region',
       'municipality', 'scheduled_service', 'gps_code', 'iata_code',
       'local_code', 'home_link', 'wikipedia_link', 'keywords'],
      dtype='object')


In [12]:
print(len(a2p))
for code in a2p["iata_code"].unique().tolist():
    if type(code) != str:
        print(code)
        continue
    if a2p["iata_code"].value_counts(dropna=False)[code]>1:
        print(code, a2p["iata_code"].value_counts(dropna=False)[code])
a2p_list = set([name for name in a2p["iata_code"]])
print(len(a2p_list))
print(a2p["iata_code"].isnull().sum())

96
KCZ 2
95
0


In [13]:
print(a2p[a2p["iata_code"]=="KCZ"][["name", "latitude_deg", "longitude_deg"]])

                       name  latitude_deg  longitude_deg
28004  Kochi Airport (高知空港)     33.546259     133.668938
41367   Kochi Ryoma Airport     33.546101     133.669006


In [14]:
not_list = [name for name in data["Arr Airport Code"].unique() if name not in a2p_list]
print(len(not_list))

0


In [15]:
data[data["Dep Airport Code"]=="KCZ"]["Dep Airport Name"].unique()

array(['Kochi (JP)'], dtype=object)

In [16]:
a2p[a2p["iata_code"]=="NRT"][["latitude_deg", "longitude_deg"]]

Unnamed: 0,latitude_deg,longitude_deg
41289,35.764702,140.386002


In [17]:
data = data[data["Dep Airport Code"]=="NRT"]
data[data["Local Dep Time"]==1200][["Effective From","Effective To",]]

Unnamed: 0,Effective From,Effective To
29801,31/3/2019,28/4/2019
30948,26/8/2019,1/9/2019
30949,9/9/2019,20/10/2019
30995,29/4/2019,18/8/2019
33620,31/3/2019,28/4/2019
33621,29/4/2019,18/8/2019
33622,29/4/2019,18/8/2019
33623,19/8/2019,25/8/2019
33624,2/9/2019,8/9/2019
34432,21/10/2019,26/10/2019
