In [1]:
import pandas as pd

In [2]:
# Read the population data file and skip empty rows
pop_filename = '../Data/NUTS-3_Population.xlsx'
no_data_rows = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 2093, 2094, 2095,
                 2096, 2097, 2098, 2099, 2100, 2101, 2102]
pop = pd.read_excel(pop_filename, skiprows=no_data_rows)

# Preprocess the population file
# Skip unnamed columns with no population data
named_columns = [col for col in pop.columns if not col.startswith('Unnamed')]
pop = pd.read_excel(pop_filename, skiprows=no_data_rows, usecols=named_columns)
pop.rename(columns={'TIME': 'NUTS Code', 'TIME.1': 'NUTS Desc'}, inplace=True)
pop

Unnamed: 0,NUTS Code,NUTS Desc,1990,1991,1992,1993,1994,1995,1996,1997,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,EU27_2020,European Union - 27 countries (from 2020),418030739,419504036,420413536,421912578,423104986,423960534,424641798,425273491,...,442883888,443666812,444802830,445534430,446208557,446446444,447319916,447073916,446735291,448753823
1,EU28,European Union - 28 countries (2013-2020),475187711,476842235,477925130,479561788,480893003,481904006,482736385,483512803,...,507235091,508520205,510181874,511378572,512372000,513093556,:,:,:,:
2,EU27_2007,European Union - 27 countries (2007-2013),470415155,472060056,473329265,475006017,476247848,477245113,478155218,478979775,...,502988282,504294889,505991205,507224359,508266507,509017310,:,:,:,:
3,BE,Belgium,9947782,9986975,10021997,10068319,10100631,10130574,10143047,10170226,...,11180840,11237274,11311117,11351727,11398589,11455519,11522440,11554767,11617623,11742796
4,BE1,Région de Bruxelles-Capitale/Brussels Hoofdste...,:,:,:,:,:,:,:,:,...,1172751,1184101,1201285,1199095,1205492,1215289,1223364,1226329,1228655,1253178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2077,TRC3,"Mardin, Batman, Şırnak, Siirt",:,:,:,:,:,:,:,:,...,2116727,2153921,2173759,2179588,2222601,2284158,2307332,2343826,2367645,2393781
2078,TRC31,Mardin,:,:,:,:,:,:,:,:,...,779738,788996,796591,796237,809719,829195,838778,854716,862757,870374
2079,TRC32,Batman,:,:,:,:,:,:,:,:,...,547581,557593,566633,576899,585252,599103,608659,620278,626319,634491
2080,TRC33,Şırnak,:,:,:,:,:,:,:,:,...,475255,488966,490184,483788,503236,524190,529615,537762,546589,557605


In [3]:
# Load cities file
c_filename = '../Data/AirportCodes_Cities_NUTS.xlsx'
cities = pd.read_excel(c_filename)
cities

Unnamed: 0,Airport Code,NUTS_3_code,City,Country
0,EBAW,BE211,Antwerp,Belgium
1,EBBR,BE100,Brussels,Belgium
2,EBCI,BE322,Charleroi,Belgium
3,EBLG,BE332,Liege,Belgium
4,EBOS,BE255,Ostend,Belgium
...,...,...,...,...
99,LFRS,FRG01,Nantes,France
100,LFSB,FRF12,Basel/Mulhouse,France/Switzerland
101,LFST,FRF11,Strasbourg,France
102,LFTH,FRL05,Toulon,France


In [4]:
cities = cities.merge(pop, left_on='NUTS_3_code', right_on='NUTS Code', how='left')
cities = cities.drop(columns=['NUTS Code', 'NUTS Desc'])

output_filepath = '../Data/AirportCodes_Cities_NUTS_Population.xlsx'
cities.to_excel(output_filepath)

In [5]:
import plotly_express as px

for year in ['2016', '2017', '2018', '2019']:
    cities[year] = pd.to_numeric(cities[year], errors='coerce')

top_cities = cities.nlargest(10, '2019')

cities_long = pd.melt(top_cities,
                      id_vars=['NUTS_3_code', 'City'],
                      value_vars=['2016', '2017', '2018', '2019'],
                      var_name='Year',
                      value_name='Population')

fig = px.line(cities_long, 
              x='Year', 
              y='Population', 
              color='City', 
              title='Population Development Over Time',
              labels={'Population': 'Population', 'Year': 'Year'})

fig.show()

In [7]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Initialize the geolocator
geolocator = Nominatim(user_agent="geoapiExercises")

# Add a rate limiter to avoid exceeding the rate limits
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Function to get latitude and longitude
def get_lat_lon(city_name, country_name):
    try:
        location = geocode(f"{city_name}, {country_name}")
        if location:
            return pd.Series([location.latitude, location.longitude])
        else:
            return pd.Series([None, None])
    except Exception as e:
        return pd.Series([None, None])

# Apply the function to the 'City' column

cities_lat_lon = cities
cities_lat_lon[['Latitude', 'Longitude']] = cities_lat_lon.apply(lambda row: get_lat_lon(row['City'], row['Country']), axis=1)

cities_lat_lon


Unnamed: 0,Airport Code,NUTS_3_code,City,Country,1990,1991,1992,1993,1994,1995,...,2016,2017,2018,2019,2020,2021,2022,2023,Latitude,Longitude
0,EBAW,BE211,Antwerp,Belgium,:,:,:,:,:,:,...,1036031,1041811,1047030,1053033,1059946,1062427,1067117,1081771,51.221110,4.399708
1,EBBR,BE100,Brussels,Belgium,:,:,:,:,:,:,...,1201285,1199095,1205492,1215289,1223364,1226329,1228655,1253178,50.846557,4.351697
2,EBCI,BE322,Charleroi,Belgium,:,:,:,:,:,:,...,431693,431043,431398,431879,432660,:,:,:,50.411623,4.444528
3,EBLG,BE332,Liege,Belgium,:,:,:,:,:,:,...,623887,625131,626222,626627,628363,627304,628067,630743,50.470816,5.773546
4,EBOS,BE255,Ostend,Belgium,:,:,:,:,:,:,...,155256,155982,156638,157293,157925,158320,158339,159568,51.225856,2.919496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,LFRS,FRG01,Nantes,France,1050539,1058966,1068983,1079601,1087854,1096033,...,1380852,1394909,1412502,1429272,1445171,1457806,1473637,1488876,47.218637,-1.554136
100,LFSB,FRF12,Basel/Mulhouse,France/Switzerland,670652,675110,679085,684357,688363,692543,...,762743,764030,764981,767086,767842,767083,767909,768372,,
101,LFST,FRF11,Strasbourg,France,952158,960320,968879,978610,987370,994932,...,1121407,1125559,1133552,1140057,1148073,1152662,1158915,1164485,48.584614,7.750713
102,LFTH,FRL05,Toulon,France,815714,825813,837166,846896,856303,864290,...,1055821,1058740,1067697,1076711,1085189,1095337,1103664,1112421,43.125731,5.930492


In [8]:
cities_with_data = cities_lat_lon.dropna(subset=['Latitude', 'Longitude', '2019'])

# Create a scatter map for the 2019 population
fig = px.scatter_geo(
    cities_with_data,
    lat='Latitude',
    lon='Longitude',
    hover_name='City',
    size='2019',  # Use the 2019 population for bubble size
    projection='natural earth',
    title='Population of European Cities in 2019',
    labels={'2019': 'Population'},
    scope='europe',  # Limit the map to Europe
)

# Adjust marker settings for better visualization
fig.update_traces(marker=dict(sizemode='area', sizeref=2.*max(cities_with_data['2019'])/(100.**2), sizemin=4))

fig.update_layout(
    width=1200,  
    height=800,  
    geo=dict(
        resolution=50,
        showland=True,
        landcolor="lightgray",
        showcoastlines=True,
        coastlinecolor="black",
        showocean=True,
        oceancolor="lightblue",
        showlakes=True,
        lakecolor="blue"
    )
)
fig.show()