In [1]:
# l'objectif de ce notebook est de partager le dataset en sous groupe pour chaque compagnie d'aviation

import numpy as np 
import pandas as pd 
import scipy as sc 
import matplotlib.pyplot as plt 
from itertools import product
from sklearn import linear_model
import json
import seaborn as sns
from sklearn.utils import shuffle


import missingno as msno


import plotly.offline as py
import plotly.figure_factory as ff
import plotly.graph_objs as gobj
py.init_notebook_mode(connected=True)

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import folium 
import branca.colormap as cm
from geopy.geocoders import Nominatim
import math


# Représentations spatiales

Ce notebook a pour objectif de réprésenter spatialement le retard et de définir quatres nouvelles variables la latitude et la longitude de l'aéroport de départ et d'arrivée. On visualise sur une carte géographique pour l'ensemble des compagnies, ainsi que pour chaque compagnie aérienne, le retard en fonction de l'aeroport. On visualise ensuite l'ensemble des liaisons pour l'ensemble des compagnies et pour chaqu'une des compagnies. 
On tient aussi compte graphiquement du traffic par aéroport et par trajet. 
Pour obtenir les données spatiales, on utilise un géo-encoder.
Les cartes sont sauvegardés en fichiers html pour être implantées dans l'API.

In [2]:
# données brutes 
fly_data = pd.read_csv('datafly_tot.csv')

In [3]:
print (fly_data.shape)
print (fly_data.columns)

(5635754, 10)
Index(['Unnamed: 0', 'FL_DATE', 'ORIGIN_CITY_NAME', 'DEST_CITY_NAME',
       'DEP_DELAY_NEW', 'ARR_DELAY_NEW', 'CANCELLED', 'DIVERTED', 'CARRIER',
       'TAIL_NUM'],
      dtype='object')


In [4]:
fly_data = shuffle(fly_data)

In [5]:
# réduire le nombre d'exemple pour encoder plus facilement 
m = fly_data.shape[0]
fly_data = fly_data.iloc[0:int(np.around(m/5)),:]
print (fly_data.shape)

(1127151, 10)


In [6]:
# détermine le nombre, les occurences / les pourcentages des objets pour une variable donnée
def plot_value_counts2(col_name,n_oc):
    
    #we don't want more than 31 bars
    if len(pd.DataFrame(df[col_name].dropna().value_counts())) > n_oc:
        max = n_oc
    else:
        max = len(pd.DataFrame(df[col_name].dropna().value_counts()))
        
        
    values_count = pd.DataFrame(df[col_name].dropna().value_counts()[:max])
    values_count.columns = ['count']
    # convert the index column into a regular column.
    values_count[col_name] = [ str(i) for i in values_count.index ]
    # add a column with the percentage of each data point to the sum of all data points.
    values_count['percent'] = values_count['count'].div(values_count['count'].sum()).multiply(100).round(2)
    # change the order of the columns.
    values_count = values_count.reindex([col_name,'count','percent'],axis=1)
    values_count.reset_index(drop=True,inplace=True)
    return (values_count)

In [7]:
# on applique aux compagnies aeriennes :
df = fly_data
carrier = plot_value_counts2('CARRIER',n_oc=1000)
print (carrier)

   CARRIER   count  percent
0       WN  259463    23.02
1       DL  194492    17.26
2       AA  176822    15.69
3       OO  121042    10.74
4       UA  109118     9.68
5       EV  100567     8.92
6       B6   56819     5.04
7       AS   33319     2.96
8       NK   27730     2.46
9       F9   19092     1.69
10      HA   15294     1.36
11      VX   13393     1.19


In [8]:
# pour chaque companie, on enregistre le dataframe en sous groupe :
# liste des noms des companies /
liste_name = []
for i in range (carrier.shape[0]):
    liste_name.append(carrier.iloc[i,0])

for i in range (carrier.shape[0]):
    carrier_unique = fly_data[fly_data['CARRIER'] == carrier.iloc[i,0]]
    print (carrier_unique.shape) 
    # créer un dataframe pour chaque companie
    liste_name[i] = carrier_unique
    # sauvegarder en csv la nouvelle base de donnée
    #index_month = carrier_unique.columns.get_loc('MONTH')   
    #month = carrier_unique.iloc[0,index_month]
    #carrier_val = carrier.iloc[i,0]    
    #carrier_unique.to_csv("fd_{}_{}.csv".format(month, carrier_val))
    

(259463, 10)
(194492, 10)
(176822, 10)
(121042, 10)
(109118, 10)
(100567, 10)
(56819, 10)
(33319, 10)
(27730, 10)
(19092, 10)
(15294, 10)
(13393, 10)


On étudie le retard en fonction du noms des villes, de l'importance du trajet/ la distancepour cela on visualise spacialement sur une carte les retards pour obtenir une idée des correlations entre ces différentes variables. 

In [9]:
# chercher le nom des villes ainsi que l'importance de chaque ville 
df = fly_data
city = plot_value_counts2('ORIGIN_CITY_NAME',n_oc=1000)
print (city)

            ORIGIN_CITY_NAME  count  percent
0                Atlanta, GA  79108     7.02
1                Chicago, IL  66576     5.91
2                 Denver, CO  45145     4.01
3            Los Angeles, CA  42611     3.78
4      Dallas/Fort Worth, TX  39589     3.51
5               New York, NY  39101     3.47
6                Houston, TX  38623     3.43
7          San Francisco, CA  34357     3.05
8                Phoenix, AZ  31385     2.78
9              Las Vegas, NV  29981     2.66
10           Minneapolis, MN  26479     2.35
11               Seattle, WA  26007     2.31
12               Detroit, MI  25274     2.24
13               Orlando, FL  25131     2.23
14                Boston, MA  23977     2.13
15                Newark, NJ  23408     2.08
16        Salt Lake City, UT  22265     1.98
17            Washington, DC  22009     1.95
18             Charlotte, NC  20706     1.84
19             Baltimore, MD  19329     1.71
20       Fort Lauderdale, FL  17018     1.51
21        

In [10]:
# afficher les coordonnées correspondantes
geolocator = Nominatim(user_agent='Tann',timeout=100) 

lat = np.zeros(city.shape[0]) # latitude correspondant à la ville
long = np.zeros(city.shape[0]) # longitude correspondant à la ville
ind = city.columns.get_loc('ORIGIN_CITY_NAME')
liste = []

for i in range (city.shape[0]):       
    
    location = geolocator.geocode(city.iloc[i,ind])
    if location is None:
        liste.append(i) 
        
    elif location:    
    
        lat[i] = location.latitude
        long[i] = location.longitude
    
city['latitude'] = lat[:]
city['longitude'] = long[:]
print (city)


            ORIGIN_CITY_NAME  count  percent   latitude   longitude
0                Atlanta, GA  79108     7.02  33.749099  -84.390185
1                Chicago, IL  66576     5.91  41.875562  -87.624421
2                 Denver, CO  45145     4.01  39.739143 -104.984696
3            Los Angeles, CA  42611     3.78  34.053683 -118.242767
4      Dallas/Fort Worth, TX  39589     3.51  32.760292  -97.161647
5               New York, NY  39101     3.47  40.730862  -73.987156
6                Houston, TX  38623     3.43  29.758938  -95.367697
7          San Francisco, CA  34357     3.05  45.414978  -72.749396
8                Phoenix, AZ  31385     2.78  33.448587 -112.077346
9              Las Vegas, NV  29981     2.66  36.166286 -115.149225
10           Minneapolis, MN  26479     2.35  44.977300  -93.265469
11               Seattle, WA  26007     2.31  47.603832 -122.330062
12               Detroit, MI  25274     2.24  42.331551  -83.046640
13               Orlando, FL  25131     2.23  28

In [11]:
# ajouter les coordonnées manuellement 
for i in range (len(liste)):
    print (city.iloc[liste[i],ind])
ind_lat = city.columns.get_loc('latitude')
ind_long = city.columns.get_loc('longitude')
city.iloc[liste[0],ind_lat] = 38.684574
city.iloc[liste[0],ind_long] = -121.588406

city.iloc[liste[1],ind_lat] = 26.179868
city.iloc[liste[1],ind_long] = -98.239391

city.iloc[liste[2],ind_lat] = 43.536496
city.iloc[liste[2],ind_long] = -84.080847

city.iloc[liste[3],ind_lat] = 36.477382
city.iloc[liste[3],ind_long] = -82.405566

city.iloc[liste[4],ind_lat] = 41.338021
city.iloc[liste[4],ind_long] = -75.728052

city.iloc[liste[5],ind_lat] = 43.503787
city.iloc[liste[5],ind_long] = -114.296088

city.iloc[liste[6],ind_lat] = 34.712303
city.iloc[liste[6],ind_long] =  -77.580538

city.iloc[liste[7],ind_lat] =  41.507298
city.iloc[liste[7],ind_long] = -74.103223

city.iloc[liste[8],ind_lat] = 35.076250
city.iloc[liste[8],ind_long] = -77.038637

city.iloc[liste[9],ind_lat] = 45.814810
city.iloc[liste[9],ind_long] =  -88.115630

city.iloc[liste[10],ind_lat] = 31.665073
city.iloc[liste[10],ind_long] = -89.170729

city.iloc[liste[11],ind_lat] = 13.486790
city.iloc[liste[11],ind_long] = 144.794289
    

Sacramento, CA
Mission/McAllen/Edinburg, TX
Bristol/Johnson City/Kingsport, TN
Saginaw/Bay City/Midland, MI
Scranton/Wilkes-Barre, PA
Jacksonville/Camp Lejeune, NC
Sun Valley/Hailey/Ketchum, ID
Iron Mountain/Kingsfd, MI
Hattiesburg/Laurel, MS
Newburgh/Poughkeepsie, NY
New Bern/Morehead/Beaufort, NC
Guam, TT
Manhattan/Ft. Riley, KS


In [12]:
# restreint les datasets aux variables villes et retard : 
liste_name_red = []

fly_data_red = fly_data[['ORIGIN_CITY_NAME','DEST_CITY_NAME',
                         'ARR_DELAY_NEW','DEP_DELAY_NEW']]
for i in range (len(liste_name)):
    liste_name_red.append(liste_name[i][['ORIGIN_CITY_NAME',
                                             'DEST_CITY_NAME','ARR_DELAY_NEW','DEP_DELAY_NEW']])
  

In [13]:
# supprimer les retard de plus de 12h 

print (fly_data.shape[0])
fly_data_red = fly_data_red[fly_data_red['ARR_DELAY_NEW']<= 720 ]
print (fly_data_red.shape[0])
fly_data_red = fly_data_red[fly_data_red['DEP_DELAY_NEW']<= 720 ]
print (fly_data_red.shape[0])



1127151
1110832
1110821


In [14]:
# on compte le nombre de liaisons ville de départ > ville d'arrivée 
# on enregistre les coordonnées de type lat_dep, long_dep lat_ar long_ar
index_long = city.columns.get_loc('longitude')
index_lat = city.columns.get_loc('latitude')

# listes des coordonnées des villes de départ et d'arrivée
lat_dep = []
long_dep = []
lat_arr = []
long_arr = []

# importance du trajet 
count_path = []

# retards moyens par trajet
delay_dep_path = []
delay_arr_path = []

# retards moyens par ville 
delay_dep_city = []
delay_arr_city = []

for i in range (city.shape[0]): 
        
        # réduit le dataset à la ville considérée
        city_red = fly_data_red[fly_data_red['ORIGIN_CITY_NAME'] == city.iloc[i,0]] 
        # enregistre le retard moyen par ville 
        delay_dep_city.append(city_red.iloc[:,3].sum(axis=0)/city.iloc[i,1])
        delay_arr_city.append(city_red.iloc[:,2].sum(axis=0)/city.iloc[i,1])
        #delay_dep_city.append(np.median(city_red.iloc[:,3]))        
        #delay_arr_city.append(np.median(city_red.iloc[:,2]))
        # compte le nombre de valeurs > importance de l'aeroport 
        num_liaison = city_red.shape[0]
        # étudie les villes d'arrivée /
        df = city_red        
        city_ar = plot_value_counts2('DEST_CITY_NAME',n_oc=1000)
        
        for j in range (city_ar.shape[0]):
            
            # réduit le dataset au chemin (ville de départ > ville d'arrivée)
            city_ar_red = city_red[city_red['DEST_CITY_NAME'] == city_ar.iloc[j,0]]
            # enregistre le retard moyen par trajet
            delay_dep_path.append(city_ar_red.iloc[:,3].sum(axis=0)/city_ar.iloc[j,1])
            delay_arr_path.append(city_ar_red.iloc[:,2].sum(axis=0)/city_ar.iloc[j,1])
            #delay_dep_path.append(np.median(city_ar_red.iloc[:,3]))
            #delay_arr_path.append(np.median(city_ar_red.iloc[:,2]))
            
            city_ar_co = city[city['ORIGIN_CITY_NAME'] == city_ar.iloc[j,0]] 
            # coordonnées de la ville de départ 
            lat_dep.append(city.iloc[i,index_lat])
            long_dep.append(city.iloc[i,index_long])
            # coordonnée de la ville d'arrivée 
            lat_arr.append(city_ar_co.iloc[0,index_lat])
            long_arr.append(city_ar_co.iloc[0,index_long])
            # importance du trajet 
            count_path.append(city_ar.iloc[j,1])
            
print (len(lat_dep))
print (len(long_dep))
print (len(lat_arr))
print (len(long_arr))
print (len(count_path))

print (len(delay_dep_path))
print (len(delay_arr_path))

print (len(delay_dep_city))
print (len(delay_arr_city))      
        
        
        
      


4098
4098
4098
4098
4098
4098
4098
307
307


In [15]:
# créer une liste couleur : 
couleurs = ['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
           '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333']

delay_dep_path_c = []
delay_arr_paty_c = []
delay_dep_city_c = []
delay_arr_city_c = []

# associer une échelle log
for i in range (len(delay_dep_path)):
    
    delay_dep_path[i] = np.log(delay_dep_path[i] + 1)
    delay_arr_path[i] = np.log(delay_arr_path[i] + 1)
for i in range (len(delay_dep_city)):
    
    delay_dep_city[i] = np.log(delay_dep_city[i] + 1)
    delay_arr_city[i] = np.log(delay_arr_city[i] + 1)      


delay_dep_path_c = (pd.cut((delay_dep_path[:]), 
             14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
delay_arr_path_c = (pd.cut((delay_arr_path[:]), 
             14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
delay_dep_city_c = (pd.cut(delay_dep_city[:], 
             14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
delay_arr_city_c = (pd.cut(delay_arr_city[:], 
             14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))

# remplacer les lettres dans par les chiffres correspondants
z = {'A': 0, 'B': 1, 'C': 2,'D':3 ,'E':4, 'F': 5, 'G': 6, 'H': 7,'I':8 ,'J':9,
    'K': 10, 'L': 11, 'M': 12, 'N': 13}
delay_dep_path_c = delay_dep_path_c.map(z)
delay_arr_path_c = delay_arr_path_c.map(z)
delay_dep_city_c = delay_dep_city_c.map(z)
delay_arr_city_c = delay_arr_city_c.map(z)

print (np.sort(delay_dep_city))




[0.         0.         0.         0.08701138 0.97739872 1.12522983
 1.19748119 1.22287696 1.26322514 1.29928298 1.36022212 1.40310148
 1.43694615 1.45125226 1.5260563  1.54817903 1.72509697 1.74527183
 1.77328544 1.78072458 1.79303093 1.80874829 1.80915121 1.87271534
 1.91192914 1.93753314 1.94113687 1.95990634 1.98403621 1.98591548
 1.99837835 2.00875126 2.00978554 2.01438609 2.03343216 2.04122033
 2.04352183 2.0483074  2.05579978 2.05891205 2.06056892 2.06216996
 2.07060535 2.0747134  2.1005385  2.10241285 2.109798   2.11731803
 2.13268606 2.147839   2.16177101 2.1630786  2.1690901  2.17770091
 2.19722458 2.19794582 2.1981312  2.20047987 2.20054226 2.20399921
 2.20565455 2.20873218 2.21063884 2.21200121 2.22217061 2.22714134
 2.22757297 2.23035037 2.23489799 2.2349141  2.23671235 2.23994952
 2.24349618 2.24440314 2.24513666 2.25622546 2.26096091 2.26600877
 2.27052034 2.27423764 2.2791749  2.27962856 2.27965759 2.28096034
 2.29025448 2.29347242 2.29563352 2.29820049 2.30234922 2.3025

In [16]:
# établir une carte de visualisation des villes via leurs importances respectives et leurs retards
index_long = city.columns.get_loc('longitude')
index_lat = city.columns.get_loc('latitude')
# Make an empty map
m = folium.Map(location=[40,-100], tiles="CartoDB dark_matter", zoom_start=3)

colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
           '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
            vmin=0, vmax=np.amax(delay_dep_city))
colormap.caption = 'Retard moyen au départ'
m.add_child(colormap)

m.save('results', 'Colormaps_3.html')

# I can add marker one by one on the map
for i in range(city.shape[0]):
    
    folium.Circle(
    location=[city.iloc[i,index_lat],city.iloc[i,index_long]],
    radius=(int((np.log(city.iloc[i,1])*10000))),
    color=couleurs[int(delay_dep_city_c[i])],
    fill=True,
    fill_color=couleurs[int(delay_dep_city_c[i])]).add_to(m)
 
# Save it as html
m.save('globalmap_airport.html')





In [17]:
# établir une carte de visualisation des villes via leurs importances respectives et leurs retards
index_long = city.columns.get_loc('longitude')
index_lat = city.columns.get_loc('latitude')
# Make an empty map
m = folium.Map(location=[20,0], tiles="CartoDB dark_matter", zoom_start=2)

colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
           '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
            vmin=0, vmax=np.amax(delay_dep_city))
colormap.caption = 'Retard moyen a larrive'
m.add_child(colormap)

m.save('results', 'Colormaps_3.html')

# I can add marker one by one on the map
for i in range(city.shape[0]):
    
    folium.Circle(
    location=[city.iloc[i,index_lat],city.iloc[i,index_long]],
    radius=(int((np.log(city.iloc[i,1])*10000))),
    color=couleurs[int(delay_arr_city_c[i])],
    fill=True,
    fill_color=couleurs[int(delay_arr_city_c[i])]).add_to(m)
 
# Save it as html
m.save('mymap_delaycity_arr.html')

In [18]:
# tracer les trajets en fonction de l'importance et du retard 
#map = folium.Map(width=500,height=500,location=[40, -99], zoom_start=4)

# Make an empty map
m = folium.Map(location=[40,-100], tiles="CartoDB dark_matter", zoom_start=3)

colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
           '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
            vmin=0, vmax=np.amax(delay_dep_city))
colormap.caption = 'Retard moyen au départ'
m.add_child(colormap)

m.save('results', 'Colormaps_3.html')

for i in range (len(count_path)):
    folium.features.PolyLine(locations=[[lat_dep[i],long_dep[i]],
                                [lat_arr[i],long_arr[i]]],                                          
                                weight=0.15,
                                color=couleurs[int(delay_arr_path_c[i])],
                                popup=None).add_to(m)
    
# Save it as html
m.save('global_mappath.html')


In [19]:
# même procédure en distinguant les companies d'aviation :

# chercher le nom des villes ainsi que l'importance de chaque ville 

for k in range (len(liste_name_red)):
    df = liste_name_red[k]
    city_c = plot_value_counts2('ORIGIN_CITY_NAME',n_oc=1000)

    # supprimer les retard de plus de 12h 
    fly_data_red = liste_name_red[k]
    fly_data_red = fly_data_red[fly_data_red['ARR_DELAY_NEW']<= 720 ]
    fly_data_red = fly_data_red[fly_data_red['DEP_DELAY_NEW']<= 720 ]

    # on compte le nombre de liaisons ville de départ > ville d'arrivée 
    # on enregistre les coordonnées de type lat_dep, long_dep lat_ar long_ar
    index_long = city.columns.get_loc('longitude')
    index_lat = city.columns.get_loc('latitude')

    # listes des coordonnées des villes de départ et d'arrivée
    lat_dep = []
    long_dep = []
    lat_arr = []
    long_arr = []

    # importance du trajet 
    count_path = []

    # retards moyens par trajet
    delay_dep_path = []
    delay_arr_path = []

    # retards moyens par ville 
    delay_dep_city = []
    delay_arr_city = []

    for i in range (city_c.shape[0]): 

            # réduit le dataset à la ville considérée
            city_red = fly_data_red[fly_data_red['ORIGIN_CITY_NAME'] == city_c.iloc[i,0]] 
            # enregistre le retard moyen par ville 
            delay_dep_city.append(city_red.iloc[:,3].sum(axis=0)/city_c.iloc[i,1])
            delay_arr_city.append(city_red.iloc[:,2].sum(axis=0)/city_c.iloc[i,1])
            #delay_dep_city.append(np.median(city_red.iloc[:,3]))        
            #delay_arr_city.append(np.median(city_red.iloc[:,2]))
            # compte le nombre de valeurs > importance de l'aeroport 
            num_liaison = city_red.shape[0]
            # étudie les villes d'arrivée /
            df = city_red        
            city_ar = plot_value_counts2('DEST_CITY_NAME',n_oc=1000)

            for j in range (city_ar.shape[0]):

                # réduit le dataset au chemin (ville de départ > ville d'arrivée)
                city_ar_red = city_red[city_red['DEST_CITY_NAME'] == city_ar.iloc[j,0]]
                # enregistre le retard moyen par trajet
                delay_dep_path.append(city_ar_red.iloc[:,3].sum(axis=0)/city_ar.iloc[j,1])
                delay_arr_path.append(city_ar_red.iloc[:,2].sum(axis=0)/city_ar.iloc[j,1])
                #delay_dep_path.append(np.median(city_ar_red.iloc[:,3]))
                #delay_arr_path.append(np.median(city_ar_red.iloc[:,2]))

                city_ar_co = city[city['ORIGIN_CITY_NAME'] == city_ar.iloc[j,0]] 
                city_dep_co = city[city['ORIGIN_CITY_NAME'] == city_c.iloc[i,0]] 
                # coordonnées de la ville de départ 
                lat_dep.append(city_dep_co.iloc[0,index_lat])
                long_dep.append(city_dep_co.iloc[0,index_long])
                # coordonnée de la ville d'arrivée 
                lat_arr.append(city_ar_co.iloc[0,index_lat])
                long_arr.append(city_ar_co.iloc[0,index_long])
                # importance du trajet 
                count_path.append(city_ar.iloc[j,1])

    # créer une liste couleur : 
    couleurs = ['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
               '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333']

    delay_dep_path_c = []
    delay_arr_paty_c = []
    delay_dep_city_c = []
    delay_arr_city_c = []

    # associer une échelle log
    for i in range (len(delay_dep_path)):

        delay_dep_path[i] = np.log(delay_dep_path[i] + 1)
        delay_arr_path[i] = np.log(delay_arr_path[i] + 1)
    for i in range (len(delay_dep_city)):

        delay_dep_city[i] = np.log(delay_dep_city[i] + 1)
        delay_arr_city[i] = np.log(delay_arr_city[i] + 1)      


    delay_dep_path_c = (pd.cut((delay_dep_path[:]), 
                 14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
    delay_arr_path_c = (pd.cut((delay_arr_path[:]), 
                 14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
    delay_dep_city_c = (pd.cut(delay_dep_city[:], 
                 14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))
    delay_arr_city_c = (pd.cut(delay_arr_city[:], 
                 14, labels=['A','B','C','D','E','F','G','H','I','J','K','L','M','N']))

    # remplacer les lettres dans par les chiffres correspondants
    z = {'A': 0, 'B': 1, 'C': 2,'D':3 ,'E':4, 'F': 5, 'G': 6, 'H': 7,'I':8 ,'J':9,
        'K': 10, 'L': 11, 'M': 12, 'N': 13}
    delay_dep_path_c = delay_dep_path_c.map(z)
    delay_arr_path_c = delay_arr_path_c.map(z)
    delay_dep_city_c = delay_dep_city_c.map(z)
    delay_arr_city_c = delay_arr_city_c.map(z)


    # établir une carte de visualisation des villes via leurs importances respectives et leurs retards
    index_long = city.columns.get_loc('longitude')
    index_lat = city.columns.get_loc('latitude')
    # Make an empty map
    m = folium.Map(location=[40,-100], tiles="CartoDB dark_matter", zoom_start=3)

    colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
               '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
                vmin=0, vmax=np.amax(delay_dep_city))
    colormap.caption = 'Retard moyen au départ'
    m.add_child(colormap)

    m.save('results', 'Colormaps_3.html')

    # I can add marker one by one on the map
    for i in range(city_c.shape[0]):
        
        
        
        city_dep_co = city[city['ORIGIN_CITY_NAME'] == city_c.iloc[i,0]] 
        
        folium.Circle(
        location=[city_dep_co.iloc[0,index_lat],city_dep_co.iloc[0,index_long]],
        radius=(int((np.log(city_c.iloc[i,1])*10000))),
        color=couleurs[int(delay_dep_city_c[i])],
        fill=True,
        fill_color=couleurs[int(delay_dep_city_c[i])]).add_to(m)

    # Save it as html
    m.save('map_carrierairport{}.html'.format(k))


    # établir une carte de visualisation des villes via leurs importances respectives et leurs retards
    index_long = city.columns.get_loc('longitude')
    index_lat = city.columns.get_loc('latitude')
    # Make an empty map
    m = folium.Map(location=[40,-100], tiles="CartoDB dark_matter", zoom_start=3)

    colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
               '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
                vmin=0, vmax=np.amax(delay_dep_city))
    colormap.caption = 'Retard moyen a larrive'
    m.add_child(colormap)

    m.save('results', 'Colormaps_3.html')

    # I can add marker one by one on the map
    for i in range(city_c.shape[0]):
        
        city_dep_co = city[city['ORIGIN_CITY_NAME'] == city_c.iloc[i,0]] 

        folium.Circle(
        location=[city_dep_co.iloc[0,index_lat],city_dep_co.iloc[0,index_long]],
        radius=(int((np.log(city_c.iloc[i,1])*10000))),
        color=couleurs[int(delay_arr_city_c[i])],
        fill=True,
        fill_color=couleurs[int(delay_arr_city_c[i])]).add_to(m)

    # Save it as html
    m.save('mymap_delaycity_arr{}.html'.format(k))

    # tracer les trajets en fonction de l'importance et du retard 


    # Make an empty map
    m = folium.Map(location=[40,-100], tiles="CartoDB dark_matter", zoom_start=3)

    colormap = cm.StepColormap(['#33ff66','#33ff99','#33ffcc','#33ffff','#33ccff','#3399ff','#3366ff','#3333ff','#6633ff','#9933ff','#cc33ff',
               '#ff33ff','#ff33cc','#ff3399','#ff3366','#ff3333'],
                vmin=0, vmax=np.amax(delay_dep_city))
    colormap.caption = 'Retard moyen au départ'
    m.add_child(colormap)

    m.save('results', 'Colormaps_3.html')

    for i in range (len(count_path)):
        folium.features.PolyLine(locations=[[lat_dep[i],long_dep[i]],
                                    [lat_arr[i],long_arr[i]]],                                          
                                    weight=0.15,
                                    color=couleurs[int(delay_arr_path_c[i])],
                                    popup=None).add_to(m)

    # Save it as html
    m.save('map_carrierpath{}.html'.format(k))
    




  


