In [2]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import pandas as pd
import plotly_express as px

In [None]:
population_filepath = 'https://raw.githubusercontent.com/Davideij/Python-Project-Group-3/main/Data/AirportCodes_Cities_NUTS_Population.xlsx'
population = pd.read_excel(population_filepath)

In [None]:
# Load the City Pairs data
city_pairs_filepath = 'https://raw.githubusercontent.com/Davideij/Python-Project-Group-3/main/Data/CITIES_FINAL.xlsx'
city_pairs_file = pd.read_excel(city_pairs_filepath, sheet_name=None)

# Merge the excel tabs into one data frame
city_pairs = pd.DataFrame()

for sheet_name, frame in city_pairs_file.items():
    frame["Year"] = sheet_name

    city_pairs = pd.concat([city_pairs,frame])

# Confirm that all unique names of the tabs have been added to the year column
print(city_pairs.Year.unique())

city_pairs

['2016' '2017' '2018' '2019']


Unnamed: 0,City_A_Name,City_B_Name,City_A,City_B,Total passengers,NUTS_3_code_A,NUTS_3_code_B,Distance,has_connection,Year
0,Paris,Toulouse,LFPO,LFBO,4716804,FR107,FRJ23,6.106723e+05,0,2016
1,Barcelona,Madrid,LEBL,LEMD,4657590,ES511,ES300,4.967448e+05,1,2016
2,Nice,Paris,LFMN,LFPO,4249404,FRL03,FR107,6.453768e+05,0,2016
3,Berlin,Munich,EDDT,EDDM,3878625,DE300,DE212,5.010312e+05,0,2016
4,Berlin,Frankfurt,EDDT,EDDF,3869795,DE300,DE712,4.236867e+05,0,2016
...,...,...,...,...,...,...,...,...,...,...
718,DÃ¼sseldorf,Friedrichshafen,EDDL,EDNY,15529,DEA11,DE147,4.336239e+05,0,2019
719,Groningen,Tenerife South,EHGG,GCTS,13734,NL111,ES709,3.366764e+06,0,2019
720,Liege,Palma de Mallorca,EBLG,LEPA,12985,BE332,ES532,1.240433e+06,0,2019
721,Limoges,Paris,LFBL,LFPO,10925,FRI23,FR107,3.339321e+05,0,2019


In [6]:
# Initialize the geolocator
geolocator = Nominatim(user_agent="JohnDoe")

# Add a rate limiter to avoid exceeding the rate limits
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Helps the geocode function run faster by reducing nominatim calls for cityname/countrynames that repeat
cache = {}

# Function to get latitude and longitude
def get_lat_lon(city_name, country_name):
    # Check if the result is in the cache
    if city_name in cache and country_name in cache[city_name]:
        return cache[city_name][country_name]
    
    try:
        location = geocode(f"{city_name},{country_name}")
        if location:
            coordinates = pd.Series([location.latitude, location.longitude])

            if city_name not in cache:
                cache[city_name] = {}

            cache[city_name][country_name] = coordinates
            return coordinates
        else:
            return pd.Series([None, None])
    except Exception as e:
        return pd.Series([None, None])

# Apply the function to the 'City' column

population_lat_lon = population
population_lat_lon[['Latitude', 'Longitude']] = population_lat_lon.apply(lambda row: get_lat_lon(row['City'], row['Country']), axis=1)

population_lat_lon

Unnamed: 0.1,Unnamed: 0,Airport Code,NUTS_3_code,City,Country,1990,1991,1992,1993,1994,...,2016,2017,2018,2019,2020,2021,2022,2023,Latitude,Longitude
0,0,EBAW,BE211,Antwerp,Belgium,:,:,:,:,:,...,1036031,1041811,1047030,1053033,1059946,1062427,1067117,1081771,51.221110,4.399708
1,1,EBBR,BE100,Brussels,Belgium,:,:,:,:,:,...,1201285,1199095,1205492,1215289,1223364,1226329,1228655,1253178,50.846557,4.351697
2,2,EBCI,BE322,Charleroi,Belgium,:,:,:,:,:,...,431693,431043,431398,431879,432660,:,:,:,50.411623,4.444528
3,3,EBLG,BE332,Liege,Belgium,:,:,:,:,:,...,623887,625131,626222,626627,628363,627304,628067,630743,50.470816,5.773546
4,4,EBOS,BE255,Ostend,Belgium,:,:,:,:,:,...,155256,155982,156638,157293,157925,158320,158339,159568,51.225856,2.919496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,99,LFRS,FRG01,Nantes,France,1050539,1058966,1068983,1079601,1087854,...,1380852,1394909,1412502,1429272,1445171,1457806,1473637,1488876,47.218637,-1.554136
100,100,LFSB,FRF12,Basel/Mulhouse,France/Switzerland,670652,675110,679085,684357,688363,...,762743,764030,764981,767086,767842,767083,767909,768372,,
101,101,LFST,FRF11,Strasbourg,France,952158,960320,968879,978610,987370,...,1121407,1125559,1133552,1140057,1148073,1152662,1158915,1164485,48.584614,7.750713
102,102,LFTH,FRL05,Toulon,France,815714,825813,837166,846896,856303,...,1055821,1058740,1067697,1076711,1085189,1095337,1103664,1112421,43.125731,5.930492


In [7]:
# Merge the Population Lon Lat with onto the City Pairs Lon Lat
city_pairs['Latitude_A'] = city_pairs['City_A'].map(population.set_index('Airport Code')['Latitude'])
city_pairs['Longitude_A'] = city_pairs['City_A'].map(population.set_index('Airport Code')['Longitude'])
city_pairs['Latitude_B'] = city_pairs['City_B'].map(population.set_index('Airport Code')['Latitude'])
city_pairs['Longitude_B'] = city_pairs['City_B'].map(population.set_index('Airport Code')['Longitude'])
city_pairs

Unnamed: 0,City_A_Name,City_B_Name,City_A,City_B,Total passengers,NUTS_3_code_A,NUTS_3_code_B,Distance,has_connection,Year,Latitude_A,Longitude_A,Latitude_B,Longitude_B
0,Paris,Toulouse,LFPO,LFBO,4716804,FR107,FRJ23,6.106723e+05,0,2016,48.730060,2.367831,43.604462,1.444247
1,Barcelona,Madrid,LEBL,LEMD,4657590,ES511,ES300,4.967448e+05,1,2016,41.382894,2.177432,40.416705,-3.703582
2,Nice,Paris,LFMN,LFPO,4249404,FRL03,FR107,6.453768e+05,0,2016,43.700936,7.268391,48.730060,2.367831
3,Berlin,Munich,EDDT,EDDM,3878625,DE300,DE212,5.010312e+05,0,2016,,,48.137108,11.575382
4,Berlin,Frankfurt,EDDT,EDDF,3869795,DE300,DE712,4.236867e+05,0,2016,,,50.110644,8.682092
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
718,DÃ¼sseldorf,Friedrichshafen,EDDL,EDNY,15529,DEA11,DE147,4.336239e+05,0,2019,51.225402,6.776314,47.650028,9.480086
719,Groningen,Tenerife South,EHGG,GCTS,13734,NL111,ES709,3.366764e+06,0,2019,53.219065,6.568008,28.046141,-16.572008
720,Liege,Palma de Mallorca,EBLG,LEPA,12985,BE332,ES532,1.240433e+06,0,2019,50.470816,5.773546,39.581252,2.709268
721,Limoges,Paris,LFBL,LFPO,10925,FRI23,FR107,3.339321e+05,0,2019,45.835424,1.264485,48.730060,2.367831


In [8]:
city_pairs_with_data = city_pairs.dropna(subset=['Latitude_A', 'Longitude_A','Latitude_B', 'Longitude_B', 'Year'])

In [14]:
#Top 100 City Pairs - Reducing the map to the top 100 city pairs to facilitate map figure readibility

top_city_pairs = city_pairs_with_data.nlargest(200,'Total passengers')
max_passengers = top_city_pairs['Total passengers'].max()

fig_network_map = px.line_geo(
    top_city_pairs,
    lat='Latitude_A',
    lon='Longitude_A',
    title='European Air Passenger Connections',
    projection='natural earth',
    scope='europe',
    hover_name= 'City_A_Name',
    animation_frame= 'Year',
    labels={'lat': 'Latitude', 'lon': 'Longitude'},
    markers=True,
)
    # Add line
fig_network_map.add_scattergeo(
        lat=[row['Latitude_A'], row['Latitude_B']],
        lon=[row['Longitude_A'], row['Longitude_B']],
        mode='lines',
        hoverinfo='text',
        line=dict(width=2, color= 'blue'),
        showlegend=True
    )
        # Add markers with labels for City A
fig_network_map.add_scattergeo(
        lat=[row['Latitude_A']],
        lon=[row['Longitude_A']],
        mode='markers+text',
        marker=dict(size=6, color='red'),
        text=row['City_A_Name'],
        textposition="top center",
        showlegend=False
    )
    
    # Add markers with labels for City B
fig_network_map.add_scattergeo(
        lat=[row['Latitude_B']],
        lon=[row['Longitude_B']],
        mode='markers+text',
        marker=dict(size=6, color='red'),
        text=row['City_B_Name'],
        textposition="top center",
        showlegend=False
    )

# Update traces to customize marker and line styles
fig_network_map.update_traces(marker=dict(size=4, color='orchid'), line=dict(width=2, color='blue'))

# Update layout for better visualization
fig_network_map.update_layout(
        width=1200,  
        height=800,  
        geo=dict(
        resolution=50,
        showland=True,
        landcolor="lightgray",
        showcoastlines=True,
        coastlinecolor="black",
        showocean=True,
        oceancolor="lightblue",
        showlakes=True,
        lakecolor="lightblue"
    )
)

# Show the plot
fig_network_map.show()
