In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import networkx as nx
import random
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import folium
import geojson
import geopandas
import mplleaflet

### Data loading

In [None]:
airport_info = pd.read_csv("../Data/us-airports.csv",delimiter=",")

In [None]:
airport_info = airport_info[airport_info["type"] != "closed"] #Removes closed airports

In [None]:
airport_names_origin = pd.read_csv("../Data/L_AIRPORT_ID.txt",delimiter=",")

In [None]:
#We drop the last entry since its an "Unknown Point in Alaska"
airport_names_origin.drop(airport_names_origin.tail(1).index,inplace=True) # drop last n rows
airport_names_origin

In [None]:
airport_names_origin["Description"] = [elm.split(":")[1].strip() for elm in airport_names_origin["Description"]]

In [None]:
airport_names_dest = airport_names_origin.copy()

In [None]:
#DO NOT CHANGE, MARTIN'S PEAK CODING
airport_names_origin["ORIGIN_AIRPORT_ID"] = airport_names_origin["Code"]
airport_names_origin["ORIGIN_AIRPORT"] = airport_names_origin["Description"]
airport_names_origin = airport_names_origin.drop(["Code","Description"], axis = 1)

In [None]:
airport_names_dest["DEST_AIRPORT_ID"] = airport_names_dest["Code"]
airport_names_dest["DEST_AIRPORT"] = airport_names_dest["Description"]
airport_names_dest = airport_names_dest.drop(["Code","Description"], axis = 1)

In [None]:
data = pd.read_csv("../Data/T_T100D_SEGMENT_US_CARRIER_ONLY_2013_All.csv",delimiter=",")

In [None]:
data = pd.merge(data,airport_names_origin, on = ["ORIGIN_AIRPORT_ID"] )
data = pd.merge(data,airport_names_dest, on = ["DEST_AIRPORT_ID"] )

In [None]:
unique_airports = data.drop_duplicates(subset=['ORIGIN',"DEST"])

In [None]:
unique_airports

In [None]:
#No idea what this does - Tak Martin
airport_origin_coordinates = dict() 
airport_abv_coordinates = dict() #Abviviation of municipality
airport_name_coordinates = dict() #The actual name of the airport
for elm in unique_airports.iterrows():
    #counter += 1
    #print(elm[1][22])
    #print(elm)
    #break
    local_code = elm[1][22]
    sub = airport_info.loc[airport_info["local_code"] == local_code]
    if len(sub) == 0:
        sub = airport_info.loc[airport_info["iata_code"] == local_code]
    if len(sub) == 0:
        None
    else:
        lat_long = (float(sub["latitude_deg"]),(float(sub["longitude_deg"])))
        airport_abv_coordinates[local_code] = lat_long
        airport_origin_coordinates[elm[1][23]] = lat_long
        airport_name_coordinates[elm[1][-2]] = lat_long

In [None]:
coordinates_df_origin = {"ORIGIN": list(airport_abv_coordinates.keys()),"ORIGIN_COORDINATES":list(airport_abv_coordinates.values())}
coordinates_df_origin = pd.DataFrame(pd.DataFrame.from_dict(coordinates_df_origin))

In [None]:
coordinates_df_origin

In [None]:
data = pd.merge(data,coordinates_df_origin, on = ["ORIGIN"] )

In [None]:
coordinates_df_dest = {"DEST": list(airport_abv_coordinates.keys()),"DEST_COORDINATES":list(airport_abv_coordinates.values())}
coordinates_df_dest = pd.DataFrame(pd.DataFrame.from_dict(coordinates_df_dest))

In [None]:
data = pd.merge(data,coordinates_df_dest, on = ["DEST"] )

In [None]:
map1 = folium.Map()

In [None]:
gdf_coord = data[['ORIGIN_COORDINATES','DEST_COORDINATES']].drop_duplicates()

In [None]:
gdf_coord

In [None]:
#for origin,name in zip(coordinates_df_origin['ORIGIN_COORDINATES'],coordinates_df_origin['ORIGIN']):
#    folium.Marker(location= origin ,popup = name,icon=folium.Icon(color='lightgray', icon='plane',prefix = 'fa')).add_to(map1)

In [None]:
for points in zip(gdf_coord['ORIGIN_COORDINATES'],gdf_coord['DEST_COORDINATES']):
    folium.PolyLine(points,color='black').add_to(map1)

In [None]:
asd = list(zip(gdf_coord['ORIGIN_COORDINATES'],gdf_coord['DEST_COORDINATES']))
len(asd) # - very weird, it is different length in main v3

In [None]:
graph1 = data[['ORIGIN_CITY_NAME','DEST_CITY_NAME']].drop_duplicates().reset_index()

In [None]:
graph1 = graph1.drop('index',axis = 1)

In [None]:
graph1 = graph1.sample(frac=1)

In [None]:
graph1 = graph1[graph1['ORIGIN_CITY_NAME'] != graph1['DEST_CITY_NAME']]

In [None]:
graph1

In [None]:
G = nx.MultiDiGraph()

In [None]:
G = nx.from_pandas_edgelist(graph1, 'ORIGIN_CITY_NAME', 'DEST_CITY_NAME')

In [None]:
plt.figure(figsize=(100, 100))
nx.draw(G, with_labels=True,node_color = 'grey',node_size = 1000)

In [None]:
origin_airport = pd.DataFrame(gdf_coord["ORIGIN_COORDINATES"].to_list(), columns=['x', 'y']).drop_duplicates()

In [None]:
flights1 = pd.DataFrame(gdf_coord["ORIGIN_COORDINATES"].to_list(), columns=['x1', 'y1'])
flights2 = pd.DataFrame(gdf_coord["DEST_COORDINATES"].to_list(), columns=['x2', 'y2'])

In [None]:
flights = pd.concat([flights1, flights2], axis=1, join='inner')

In [None]:
flights = flights[(flights['x1'] != flights['x2']) & (flights['y1'] != flights['y2'])]
flights

In [None]:
flight_count = data[['ORIGIN_COORDINATES','DEST_COORDINATES']].groupby(data[['ORIGIN_COORDINATES','DEST_COORDINATES']].columns.tolist(),sort = False,as_index=False).size().drop(['ORIGIN_COORDINATES','DEST_COORDINATES'],axis = 1)

In [None]:
flight_count

In [None]:
def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

In [None]:
dds = NormalizeData(flight_count)

In [None]:
destination = pd.DataFrame(gdf_coord["DEST_COORDINATES"].to_list(), columns=['x', 'y']).drop_duplicates()

In [None]:
x_values = [flights.x1,flights.x2]
y_values = [flights.y1,flights.y2]

In [None]:
x_values

In [None]:
#plot with the transparency of the line by the normalized value of the number of flights
fig = plt.figure(figsize = (400,80))
for i in range(len(x_values)):
    plt.plot(y_values, x_values,'grey',alpha = dds['size'][i]);
plt.scatter(y_values,x_values)
plt.show()

In [None]:
passengers = data[['PASSENGERS','ORIGIN_AIRPORT','DEST_AIRPORT']]

In [None]:
passengers = passengers.groupby(['ORIGIN_AIRPORT','DEST_AIRPORT'],sort = False,as_index = False).sum().drop(['ORIGIN_AIRPORT','DEST_AIRPORT'],axis = 1)

In [None]:
passengers

In [None]:
dds1 = NormalizeData(passengers)
pass_count = []
for i in dds1['PASSENGERS']:
    if i < 0.95:
        i+= 0.05
    pass_count.append(i)

In [None]:
#plot with the transparency of the line by the normalized value of the number of passengers + 0.05 due to values being too small
fig,ax = plt.subplots(figsize = (400,80))
for i in range(len(x_values)):
    ax.plot(y_values, x_values,'grey',alpha = pass_count[i])
#ax.scatter(y_values,x_values)
plt.savefig('transparent.png', transparent=True)

In [None]:
x_values

In [None]:
#plot with the transparency of the line by the normalized value of the number of passengers + 0.05 due to values being too small
fig,ax = plt.subplots(figsize = (400,80))

x_zip = zip(x_values[0],x_values[1])
y_zip = zip(y_values[0],y_values[1])

for i, (x,y) in enumerate(zip(x_zip,y_zip)):
    ax.plot(y,x,'grey',alpha = pass_count[i])
#ax.scatter(y_values,x_values)
plt.show()

In [None]:
x = list(zip(x_values,x_values))
y = list(zip(y_values,x_values))


In [None]:
fig,ax = plt.subplots(figsize = (100,20))
for i in range(1000):
    ax.plot(y[i],x[i])
#ax.scatter(y_values,x_values)
plt.show()