In [1]:
import pandas as pd
import plotly.express as px

# Load the datasets
df_emission = pd.read_csv("../notebooks/cache/Schiphol_Emissie_Cleaned.csv")
df_yearly = pd.read_csv("../notebooks/cache/Yearly_Traffic_Data_Cleaned.csv")

# Preprocess the emission dataset
df_emission['EMISSIE (kg)'] = df_emission['EMISSIE (kg)'].str.replace(',', '.').astype(float)

# Filter the data for "Koolstofdioxide"
df_koolstofdioxide = df_emission[df_emission['STOFNAAM'] == 'Koolstofdioxide']

# Aggregate the data by year
df_aggregated = df_koolstofdioxide.groupby('EMISSIEJAAR')['EMISSIE (kg)'].sum().reset_index()

# Merge with the yearly traffic data
df_combined = pd.merge(df_yearly, df_aggregated, left_on='Year', right_on='EMISSIEJAAR', how='inner')

# Drop unnecessary columns and handle any missing values if needed
df_combined.drop(columns=['EMISSIEJAAR'], inplace=True)
df_combined.fillna(0, inplace=True)

# Create the bubble chart
fig = px.scatter(df_combined, 
                 x='Air_Transport_Movements_Total', 
                 y='Passengers_Total', 
                 size='EMISSIE (kg)', 
                 color='EMISSIE (kg)',  
                 hover_name='Year',
                 hover_data={
                     'Air_Transport_Movements_Total': True,
                     'Passengers_Total': True,
                     'EMISSIE (kg)': True,
                     'Year': False  # Set to False to avoid duplication
                 },
                 size_max=60,
                 labels={
                     'Air_Transport_Movements_Total': 'Air Transport Movements',
                     'Passengers_Total': 'Total Passengers',
                     'EMISSIE (kg)': 'Koolstofdioxide Emissions (kg)'
                 })

# Update layout for the plot
fig.update_layout(
    title="Bubble Chart of Air Transport Movements, Passengers, and Koolstofdioxide Emissions",
    xaxis_title="Air Transport Movements",
    yaxis_title="Total Passengers",
    height=600,
    width=1000
)

# Show the plot
fig.show()
