In [1]:
import pandas as pd
import plotly_express as px
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [2]:
cities_filepath = '../Data/AirportCodes_Cities_NUTS_Population.xlsx'
cities = pd.read_excel(cities_filepath)

for year in ['2016', '2017', '2018', '2019']:
    cities[year] = pd.to_numeric(cities[year], errors='coerce')

top_cities = cities.nlargest(10, '2019')

cities_long = pd.melt(top_cities,
                      id_vars=['NUTS_3_code', 'City'],
                      value_vars=['2016', '2017', '2018', '2019'],
                      var_name='Year',
                      value_name='Population')

fig = px.line(cities_long, 
              x='Year', 
              y='Population', 
              color='City', 
              title='Population Development Over Time',
              labels={'Population': 'Population', 'Year': 'Year'})

fig.show()

In [3]:
top_cities_all_years = pd.DataFrame()

# Find top 10 cities for each year and combine them
for year in ['2016', '2017', '2018', '2019']:
    top_cities_year = cities.nlargest(10, year).copy()  # Get top 10 cities for the year
    top_cities_year['Year'] = year  # Add a 'Year' column
    top_cities_year['Population'] = top_cities_year[year]  # Rename population column for consistency
    top_cities_all_years = pd.concat([top_cities_all_years, top_cities_year[['NUTS_3_code', 'City', 'Year', 'Population']]])

# Plot a bar graph for the top cities by year
fig = px.bar(top_cities_all_years,
             x='City',
             y='Population',
             color='Year',
             barmode='group',
             title='Top 10 Largest Cities by Population for Each Year',
             labels={'Population': 'Population', 'City': 'City', 'Year': 'Year'})

fig.update_layout(xaxis={'categoryorder':'total descending'})  # Sort bars by population within each year
fig.show()

In [5]:
# Initialize the geolocator
geolocator = Nominatim(user_agent="yannickelsten")

# Add a rate limiter to avoid exceeding the rate limits
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Function to get latitude and longitude
def get_lat_lon(city_name, country_name):
    try:
        location = geocode(f"{city_name}, {country_name}")
        if location:
            return pd.Series([location.latitude, location.longitude])
        else:
            return pd.Series([None, None])
    except Exception as e:
        return pd.Series([None, None])

# Apply the function to the 'City' column

cities_lat_lon = cities
cities_lat_lon[['Latitude', 'Longitude']] = cities_lat_lon.apply(lambda row: get_lat_lon(row['City'], row['Country']), axis=1)

cities_lat_lon

Unnamed: 0.1,Unnamed: 0,Airport Code,NUTS_3_code,City,Country,1990,1991,1992,1993,1994,...,2016,2017,2018,2019,2020,2021,2022,2023,Latitude,Longitude
0,0,EBAW,BE211,Antwerp,Belgium,:,:,:,:,:,...,1036031,1041811,1047030,1053033,1059946,1062427,1067117,1081771,51.221110,4.399708
1,1,EBBR,BE100,Brussels,Belgium,:,:,:,:,:,...,1201285,1199095,1205492,1215289,1223364,1226329,1228655,1253178,50.846557,4.351697
2,2,EBCI,BE322,Charleroi,Belgium,:,:,:,:,:,...,431693,431043,431398,431879,432660,:,:,:,50.411623,4.444528
3,3,EBLG,BE332,Liege,Belgium,:,:,:,:,:,...,623887,625131,626222,626627,628363,627304,628067,630743,50.470816,5.773546
4,4,EBOS,BE255,Ostend,Belgium,:,:,:,:,:,...,155256,155982,156638,157293,157925,158320,158339,159568,51.225856,2.919496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,99,LFRS,FRG01,Nantes,France,1050539,1058966,1068983,1079601,1087854,...,1380852,1394909,1412502,1429272,1445171,1457806,1473637,1488876,47.218637,-1.554136
100,100,LFSB,FRF12,Basel/Mulhouse,France/Switzerland,670652,675110,679085,684357,688363,...,762743,764030,764981,767086,767842,767083,767909,768372,,
101,101,LFST,FRF11,Strasbourg,France,952158,960320,968879,978610,987370,...,1121407,1125559,1133552,1140057,1148073,1152662,1158915,1164485,48.584614,7.750713
102,102,LFTH,FRL05,Toulon,France,815714,825813,837166,846896,856303,...,1055821,1058740,1067697,1076711,1085189,1095337,1103664,1112421,43.125731,5.930492


In [6]:
cities_with_data = cities_lat_lon.dropna(subset=['Latitude', 'Longitude', '2019'])

# Create a scatter map for the 2019 population
fig = px.scatter_geo(
    cities_with_data,
    lat='Latitude',
    lon='Longitude',
    hover_name='City',
    size='2019',
    projection='natural earth',
    title='Population of European Cities in 2019',
    labels={'2019': 'Population'},
    scope='europe',  
)

# Adjust marker settings for better visualization
fig.update_traces(marker=dict(sizemode='area', sizeref=2.*max(cities_with_data['2019'])/(100.**2), sizemin=4))

fig.update_layout(
    width=1200,  
    height=800,  
    geo=dict(
        resolution=50,
        showland=True,
        landcolor="lightgray",
        showcoastlines=True,
        coastlinecolor="black",
        showocean=True,
        oceancolor="lightblue",
        showlakes=True,
        lakecolor="blue"
    )
)
fig.show()