Scatter Plot Distance-Population

In [1]:
import pandas as pd
import plotly_express as px

In [2]:
# Read the datasets
cities_filepath = '../Data/AirportCodes_Cities_NUTS_Population.xlsx'
distance_filepath = '../Data/CITIES_FINAL.xlsx'

cities = pd.read_excel(cities_filepath)
cities = cities.rename(columns={cities.columns[0]: 'City_Index'}).set_index('City_Index')
distance2016 = pd.read_excel(distance_filepath, sheet_name='2016')


In [3]:
# Prepare columns
df = distance2016[['City_A_Name', 'City_B_Name', 'NUTS_3_code_A', 'NUTS_3_code_B', 'Distance',]]
df.insert(loc=2, column='City_Pair_Name', value=None)

df = df.copy()
df['City_Pair_Name'] = df['City_A_Name'] + ' - ' + df['City_B_Name']


df['Combined_Pop_2016'] = None
years = [2016, 2017, 2018, 2019]

df = df.merge(cities[['NUTS_3_code', '2016']], 
               left_on='NUTS_3_code_A', 
               right_on='NUTS_3_code', 
               how='left').rename(columns={'2016': '2016 Population_A'})

# Merge for City B population
df = df.merge(cities[['NUTS_3_code', '2016']], 
               left_on='NUTS_3_code_B', 
               right_on='NUTS_3_code', 
               how='left').rename(columns={'2016': '2016 Population_B'})

# Drop the extra NUTS_3_code columns created during merging
df = df.drop(columns=['NUTS_3_code_x', 'NUTS_3_code_y'])
df = df.drop_duplicates()

In [6]:
df['Combined_Pop_2016'] = df['2016 Population_A'] + df['2016 Population_B']
df['Distance_km'] = df['Distance'] / 1000

fig = px.scatter(
    df, 
    x='Distance_km', 
    y='Combined_Pop_2016', 
    hover_name='City_Pair_Name',  # Use City_Pair_Name to label the points
    title='Scatter Plot of Distance vs Combined Population (2016)',
    labels={'Distance_km': 'Distance (km)', 'Combined_Pop_2016': 'Combined Population 2016'},
    color='City_Pair_Name'
)

# Show the plot
fig.show()
