In [116]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

In [117]:
#graph for the evolution of air passengers for France, 2002-2024


file_path = 'Data/Eurostat_national_air_passengers_in_France_2002-2024.csv'
data = pd.read_csv(file_path)

fig = px.line(data_frame = data, 
            x = 'TIME_PERIOD',
            y = 'OBS_VALUE', 
            hover_data = ('TIME_PERIOD'),
            markers = False,
            labels = {
                'TIME_PERIOD' : 'Time',
                'OBS_VALUE': 'Passengers' 
            },
            

            title= 'Total inland flight air passengers in France, 2002-2024'
)

fig.update_traces(marker = {'symbol': 'square'})
fig.add_vline(x='2023-05', line_dash="dot", line_color="red") 
fig.show() 


fig.show


<bound method BaseFigure.show of Figure({
    'data': [{'hovertemplate': 'Time=%{x}<br>Passengers=%{y}<extra></extra>',
              'legendgroup': '',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'square'},
              'mode': 'lines',
              'name': '',
              'orientation': 'v',
              'showlegend': False,
              'type': 'scatter',
              'x': array(['2002-01', '2002-02', '2002-03', ..., '2024-04', '2024-05', '2024-06'],
                         dtype=object),
              'xaxis': 'x',
              'y': array([2057266, 2018903, 2291839, ..., 2117657, 2267371, 2264263]),
              'yaxis': 'y'}],
    'layout': {'legend': {'tracegroupgap': 0},
               'shapes': [{'line': {'color': 'red', 'dash': 'dot'},
                           'type': 'line',
                           'x0': '2023-05',
                           'x1': '2023-05',
                           'xref': 'x',
            

In [118]:
#importing data for national airports in France
eurostat_french_airports = pd.read_csv('Data/Eurostat_national_air_passengers_by_French_airports_2002-2024.csv')
french_airports = eurostat_french_airports.iloc[:, -4:-1]
                            
#replacing eurostat format of airport codes into IATA format
french_airports_mapping = {
    'FR_LFBD': 'BOD',
    'FR_LFLL': 'LYS',
    'FR_LFPG': 'CDG',
    'FR_LFPO': 'ORY',
    'FR_LFRS': 'NTE'
}

french_airports['rep_airp'] = french_airports['rep_airp'].replace(french_airports_mapping)

#sorting data pre, post Covid and after regulation
french_airports_preCovid = french_airports[french_airports['TIME_PERIOD'] <= '2020-01']
french_airports_postCovid = french_airports[(french_airports['TIME_PERIOD'] >= '2021-05') & (french_airports['TIME_PERIOD'] <= '2023-04')]
french_airports_after_regulation = french_airports[french_airports['TIME_PERIOD'] >= '2023-05']

#define a function that will print trendline equations
def print_trendline_equations(period_name, data, time_column, value_column):
    print(f"\nTrendline equations for {period_name}:")
    for airport in data['rep_airp'].unique():
        airport_data = data[data['rep_airp'] == airport]
        time_numeric = pd.to_datetime(airport_data[time_column]).map(pd.Timestamp.toordinal)
        trend = np.polyfit(time_numeric, airport_data[value_column], 1)
        slope, intercept = trend
        print(f"{airport}: y = {slope:.2f}x + {intercept:.2f}")

#prints trendline equations
print_trendline_equations("pre-Covid", french_airports_preCovid, 'TIME_PERIOD', 'OBS_VALUE')
print_trendline_equations("post-Covid", french_airports_postCovid, 'TIME_PERIOD', 'OBS_VALUE')
print_trendline_equations("after regulation", french_airports_after_regulation, 'TIME_PERIOD', 'OBS_VALUE')

#graphing data for national airports in France
fig2 = px.line(french_airports, 
              x='TIME_PERIOD', 
              y='OBS_VALUE', 
              color='rep_airp',
              labels={'TIME_PERIOD': 'time', 'OBS_VALUE': 'number of passengers', 'rep_airp': 'Airport'},
              title='French National Aviation Passengers per Airport',
              )

#plotting trendlines pre and post Covid
for airport in french_airports_preCovid['rep_airp'].unique():
    airport_data = french_airports_preCovid[french_airports_preCovid['rep_airp'] == airport]
    time_numeric = pd.to_datetime(airport_data['TIME_PERIOD']).map(pd.Timestamp.toordinal)
    trend = np.polyfit(time_numeric, airport_data['OBS_VALUE'], 1)
    trendline = trend[0] * time_numeric + trend[1]
    fig2.add_scatter(
        x=airport_data['TIME_PERIOD'], 
        y=trendline, 
        mode='lines', 
        name=f'{airport} Trendline',
        showlegend= False
    )

for airport in french_airports_postCovid['rep_airp'].unique():
    airport_data = french_airports_postCovid[french_airports_postCovid['rep_airp'] == airport]
    time_numeric = pd.to_datetime(airport_data['TIME_PERIOD']).map(pd.Timestamp.toordinal)
    trend = np.polyfit(time_numeric, airport_data['OBS_VALUE'], 1)
    trendline = trend[0] * time_numeric + trend[1]
    fig2.add_scatter(
        x=airport_data['TIME_PERIOD'], 
        y=trendline, 
        mode='lines', 
        name=f'{airport} Trendline',
        showlegend= False
    )

for airport in french_airports_after_regulation['rep_airp'].unique():
    airport_data = french_airports_after_regulation[french_airports_after_regulation['rep_airp'] == airport]
    time_numeric = pd.to_datetime(airport_data['TIME_PERIOD']).map(pd.Timestamp.toordinal)
    trend = np.polyfit(time_numeric, airport_data['OBS_VALUE'], 1)
    trendline = trend[0] * time_numeric + trend[1]
    fig2.add_scatter(
        x=airport_data['TIME_PERIOD'], 
        y=trendline, 
        mode='lines', 
        name=f'{airport} Trendline',
        showlegend= False
    )

fig2.add_vline(x='2023-05', line_width=2, line_dash='dot', line_color='red')

fig2.update_layout(height = 550, 
    font = dict(size = 16))

fig2.show()



Trendline equations for pre-Covid:
BOD: y = 14.29x + -10279319.96
LYS: y = 13.86x + -9922955.78
CDG: y = 22.62x + -16141895.34
ORY: y = -27.43x + 21351631.46
NTE: y = 22.46x + -16350510.28

Trendline equations for post-Covid:
BOD: y = 13.98x + -10153357.56
LYS: y = 59.79x + -43913220.02
CDG: y = 286.23x + -210780297.63
ORY: y = 276.55x + -203378086.88
NTE: y = -34.88x + 25941838.74

Trendline equations for after regulation:
BOD: y = -86.04x + 63751675.00
LYS: y = -132.28x + 97951654.17
CDG: y = -265.70x + 196913786.49
ORY: y = -172.63x + 128318300.51
NTE: y = -102.56x + 75970304.28


In [119]:
#graph for the evolution of air passengers for Germany, 2002-2024


file_path = 'data/Eurostat_national_air_passengers_in_Germany_2002-2024.csv'
data = pd.read_csv(file_path)

fig4 = px.line(data_frame = data, 
            x = 'TIME_PERIOD',
            y = 'OBS_VALUE', 
            hover_data = ('TIME_PERIOD'),
            markers = False,
            labels = {
                'TIME_PERIOD' : 'Time',
                'OBS_VALUE': 'Passengers' 
            },
            

            title= 'Total inland flight air passengers in Germany, 2002-2024'
)

fig4.update_traces(marker = {'symbol': 'square'})
fig4.add_vline(x='2023-05', line_dash="dot", line_color="red") 
fig4.show() 


fig4.show

<bound method BaseFigure.show of Figure({
    'data': [{'hovertemplate': 'Time=%{x}<br>Passengers=%{y}<extra></extra>',
              'legendgroup': '',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'square'},
              'mode': 'lines',
              'name': '',
              'orientation': 'v',
              'showlegend': False,
              'type': 'scatter',
              'x': array(['2002-01', '2002-02', '2002-03', ..., '2024-04', '2024-05', '2024-06'],
                         dtype=object),
              'xaxis': 'x',
              'y': array([1499169, 1559125, 1721969, ..., 1037852, 1051557, 1101762]),
              'yaxis': 'y'}],
    'layout': {'legend': {'tracegroupgap': 0},
               'shapes': [{'line': {'color': 'red', 'dash': 'dot'},
                           'type': 'line',
                           'x0': '2023-05',
                           'x1': '2023-05',
                           'xref': 'x',
            

In [120]:
#graph emissions
file_path = 'Data/Eurostat_national_air_passengers_in_France_2002-2024.csv'
data = pd.read_csv(file_path)

national_aviation_emissions_2019 = 5.4*0.56*(10**(9)) #in kg; from FEUILLE DE ROUTE de décarbonation de l’aérien
national_passengers_2019 = data[ 
    (data['TIME_PERIOD'].str.match('^2019-0[1-9]$')) | 
    (data['TIME_PERIOD'].str.match('^2019-1[0-2]$'))
]['OBS_VALUE'].sum()



#plugging in the emission data
file_path_emissions = 'Data/CO2_emissions_from_national_domestic_flight_fr.csv'
emission_data = pd.read_csv(file_path_emissions, index_col=0)

#yearly passenger data
monthly_passengers = data['OBS_VALUE']
yearly_passengers = monthly_passengers.groupby(monthly_passengers.index // 12).sum()

#setting the emissions per passenger per year by taking the yearly emissions and dividing them by the passenger numbers
emissions_per_passenger = []
for i in range(22):
    emissions_per_passenger.append((emission_data.iloc[i,0] * 10**9) / yearly_passengers[i])



#use 2022 data of emissions per passenger for 2024 since that data is missing 
emissions_per_passenger.append(emissions_per_passenger[21])
emissions_per_passenger = pd.DataFrame(emissions_per_passenger)



#monthly emission data. split up the yearly emissions of the 22 and a half years per passenger onto the 270 months 
monthly_emissions_per_passenger = emissions_per_passenger.loc[emissions_per_passenger.index.repeat(12)] 
monthly_emissions_per_passenger = monthly_emissions_per_passenger.iloc[:-6]


#Monthly emissions for France's domestic aviation for 2002-2024
monthly_emissions_france = []
for i in range(len(monthly_passengers)):
    monthly_emissions_france.append (monthly_emissions_per_passenger.iloc[i]*monthly_passengers[i] / (10**9))


#changing Index of the two dataframes for the merge
monthly_emissions_france = pd.DataFrame(monthly_emissions_france)
monthly_emissions_france.index = [data['TIME_PERIOD']]
data.index = [data['TIME_PERIOD']]



#add monthly emissions as a column to the dataframe "data"
data = pd.concat([data, monthly_emissions_france], axis= 1)
data.rename(columns = {0:'TOT_MONTH_EMISS'}, inplace= True)





#sorting data pre, post Covid and after regulation
french_emissions_preCovid = data[data['TIME_PERIOD'] <= '2020-01']
french_emissions_postCovid = data[(data['TIME_PERIOD'] >= '2021-05') & (data['TIME_PERIOD'] <= '2023-04')]
french_emissions_after_regulation = data[data['TIME_PERIOD'] >= '2023-05']


#defining function to add and print trendline (equations)
def add_trendline_and_equation(fig, df, label, color):
    x = np.arange(len(df))
    y = df['TOT_MONTH_EMISS'].values

    slope, intercept = np.polyfit(x, y, 1)
    trendline = slope * x + intercept

    fig.add_scatter(x=df['TIME_PERIOD'], y=trendline, mode='lines', name=f'Trendline {label}', line=dict(color=color))

    print(f'Equation of trendline for {label}: y = {slope:.2f}x + {intercept:.2f}')

fig5 = px.line(data_frame = data, 
            x = 'TIME_PERIOD',
            y = 'TOT_MONTH_EMISS', 
            hover_data = ('TIME_PERIOD'),
            markers = False,
            labels = {
                'TIME_PERIOD' : 'Time',
                'TOT_MONTH_EMISS': 'Total emissions per month (MtCO2)' 
            },
            

            title= 'Total national aviation emissions in France per month, 2002-2024'
)

fig5.update_traces(marker = {'symbol': 'square'})
fig5.add_vline(x='2023-05', line_dash="dot", line_color="red") 


#adding trendlines and their equations
add_trendline_and_equation(fig5, french_emissions_preCovid, 'pre-Covid', 'blue')
add_trendline_and_equation(fig5, french_emissions_postCovid, 'post-Covid', 'green')
add_trendline_and_equation(fig5, french_emissions_after_regulation, 'after regulation', 'purple')


fig5.show() 



Equation of trendline for pre-Covid: y = -0.00x + 0.39
Equation of trendline for post-Covid: y = -0.00x + 0.39
Equation of trendline for after regulation: y = -0.01x + 0.41


In [121]:
#mapping the flights concerned by the French Climate and Resilience Law


france_airports = pd.read_csv('Data/Mapping/aip_aeroports2_metropole_p.csv')

fig = px.scatter_geo(france_airports, 
                     lon = france_airports.x,
                     lat = france_airports.y,
                     text = 'nom___name',
                     hover_name = 'nom___name', 
                     hover_data={'x':False,'y':False, 'nom___name': False}
                     )



#airports
fig.update_traces(textposition= ['bottom center', 'bottom center', 'bottom center', 'top center', 'middle right', 'bottom center', 'bottom center'], 
                  textfont=dict(
                         family='arial',
                         size=13.5,
                         color = 'black'),
                  marker = dict(symbol = 'circle-open-dot', size = 7, color = 'black'))

fig.update_layout(width = 900, height=600,
                  geo = dict(
                        scope = 'europe',
                        resolution = 50,
                        lonaxis_range= [-4, 8.5],
                        lataxis_range= [42, 51],
                        landcolor = 'rgb(200, 200, 200)',
                        bgcolor = 'rgb(127,205,255)'
        )
    )

#flights concerned by the regulation

for i in range(2):
    fig.add_trace(
        go.Scattergeo(
            lon = [france_airports['x'][4], france_airports['x'][i]],
            lat = [france_airports['y'][4], france_airports['y'][i]],
            mode = 'lines',
            line = dict(width = 2, color = 'green'),
            showlegend = False
            
            
                
            )
        )
fig.add_trace(
    go.Scattergeo(
        lon = [france_airports['x'][4], france_airports['x'][2]],
        lat = [france_airports['y'][4], france_airports['y'][2]],
        mode = 'lines',
        line = dict(width = 2, color = 'green'), 
        name = 'Flights banned by the Climate and Resilience law',
        showlegend = True
        )
)

#flights that should have been banned by the regulations but are still allowed due to train scheduling 

#from CDG
for i in [0, 1, 2, 5]:
    fig.add_trace(
        go.Scattergeo(
            lon = [france_airports['x'][3], france_airports['x'][i]],
            lat = [france_airports['y'][3], france_airports['y'][i]],
            mode = 'lines',
            line = dict(width = 2, color = 'red'),
            showlegend = False,
            name = france_airports['nom___name'][3]
                
            )
        )

#Lyon - Marseille
fig.add_trace(
        go.Scattergeo(
            lon = [france_airports['x'][1], france_airports['x'][6]],
            lat = [france_airports['y'][1], france_airports['y'][6]],
            mode = 'lines',
            line = dict(width = 2, color = 'red'),
            showlegend = True,
            name = 'Flights avoiding the ban due to train scheduling'
                
            )
        )


fig.show()




In [122]:
#mapping Germany 


 
germany_airports = pd.read_csv('Data/Mapping/de-airports.csv')
germany_trains = pd.read_csv('Data/train_times_germany.csv')



fig2 = px.scatter_geo(germany_airports, 
                     lon = germany_airports.longitude_deg,
                     lat = germany_airports.latitude_deg,
                     text = 'name',
                     hover_name = 'name', 
                     hover_data={'longitude_deg':False,'latitude_deg':False, 'name': False}
                     )

fig2.update_traces(textposition= ['middle right','top center', 'top center', 'bottom left', 'bottom center', 'top center', 'bottom left', 'bottom right'], 
                  textfont=dict(
                         family='arial',
                         size=14,
                         color = 'black'),
                  marker = dict(symbol = 'circle-open-dot', size = 8, color = 'rgb(0, 0, 0)')
                  )

fig2.update_layout(width = 900, height=600,
                  geo = dict(
                        scope = 'europe',
                        resolution = 50,
                        lonaxis_range= [5.4, 15.6],
                        lataxis_range= [47.4, 55],
                        landcolor = 'rgb(200, 200, 200)',
                        bgcolor = 'rgb(127,205,255)'
                             )
                    )


#air connections with <2h30 train connections

#from Frankfurt
for i in [2, 6, 7]:
    fig2.add_trace(
        go.Scattergeo(
            lon = [germany_airports['longitude_deg'][i], germany_airports['longitude_deg'][3]],
            lat = [germany_airports['latitude_deg'][i], germany_airports['latitude_deg'][3]],
            mode = 'lines',
            line = dict(width = 1.8, color = 'green'),
            showlegend = False
                )
            )
    
    fig2.add_trace(
        go.Scattergeo(
            lon = [(germany_airports['longitude_deg'][i] + germany_airports['longitude_deg'][3])/2],
            lat = [(germany_airports['latitude_deg'][i] + germany_airports['latitude_deg'][3])/2], 
            mode = 'text',
            text = germany_trains.iloc[i,4],
            showlegend = False,
            textfont=dict(
                    family='arial',
                    size=12,
                    color = 'black')
                

            )
        )

#Stuttgart - Munich
fig2.add_trace(
        go.Scattergeo(
            lon = [germany_airports['longitude_deg'][4], germany_airports['longitude_deg'][6]],
            lat = [germany_airports['latitude_deg'][4], germany_airports['latitude_deg'][6]],
            mode = 'lines',
            line = dict(width = 1.8, color = 'green'),
            name = 'Flight connections with <2h30 rail connections',
            showlegend = True
                )
            )
fig2.add_trace(
    go.Scattergeo(
        lon = [(germany_airports['longitude_deg'][4] + germany_airports['longitude_deg'][6])/2],
        lat = [(germany_airports['latitude_deg'][4] + germany_airports['latitude_deg'][6])/2], 
        mode = 'text',
        text = germany_trains.iloc[i,4],
        showlegend = False,
        textfont=dict(
                family='arial',
                size=12,
                color = 'black')
                

            )
        )

       
       


#air connections with 2h30-4h train connections 

#from Hamburg
for i in [2, 3, 5]:
    fig2.add_trace(
        go.Scattergeo(
            lon = [germany_airports['longitude_deg'][i], germany_airports['longitude_deg'][0]],
            lat = [germany_airports['latitude_deg'][i], germany_airports['latitude_deg'][0]],
            mode = 'lines',
            line = dict(width = 1.8, color = 'orange'),
            showlegend = False
                )
            )
    fig2.add_trace(
        go.Scattergeo(
            lon = [(germany_airports['longitude_deg'][i] + germany_airports['longitude_deg'][0])/2],
            lat = [(germany_airports['latitude_deg'][i] + germany_airports['latitude_deg'][0])/2], 
            mode = 'text',
            text = germany_trains.iloc[i,1],
            showlegend = False,
            textfont=dict(
                    family='arial',
                    size=12,
                    color = 'black')
                

            )
        )


#Munich - Frankfurt 

fig2.add_trace(
        go.Scattergeo(
            lon = [germany_airports['longitude_deg'][3], germany_airports['longitude_deg'][4]],
            lat = [germany_airports['latitude_deg'][3], germany_airports['latitude_deg'][4]],
            mode = 'lines',
            line = dict(width = 1.8, color = 'orange'),
            name = 'Flight connections with 2h30-4h rail connections',
            showlegend = True
                )
            )

fig2.add_trace(
    go.Scattergeo(
        lon = [(germany_airports['longitude_deg'][3] + germany_airports['longitude_deg'][4])/2],
        lat = [(germany_airports['latitude_deg'][3] + germany_airports['latitude_deg'][4])/2], 
        mode = 'text',
        text = germany_trains.iloc[4, 4],
        showlegend = False,
        textfont=dict(
                family='arial',
                size=12,
                color = 'black')
                

            )
        )



fig2.show()

In [123]:
# Create the data manually based on the provided structure
data = {
    "Journey": ["CGN - DUS", "NUE - MUC", "FRA - CGN", "STG - NUE", "FRA - STG", "FRA - DUS", "FRA - NUE", 
                "MUC - STG", "HAM - BER", "CGN - STG", "FRA - MUC", "HAM - DUS", "DUS - STG", "CGN - NUE", 
                "DUS - NUE", "HAM - CGN", "BER - NUE", "FRA - HAM", "FRA - BER", "MUC - CGN", "MUC - BER", 
                "NUE - HAM", "BER - CGN", "BER - DUS", "MUC - DUS", "BER - STG", "HAM - STG", "MUC - HAM"],
    "Direct Train Time": ["00:20", "01:00", "01:10", "02:15", "01:45", "01:30", "02:00", 
                          "02:00", "02:30", "02:45", "03:05", "03:35", "03:30", "03:15", 
                          "03:30", "04:00", "03:30", "03:30", "04:00", "04:30", "04:00",
                          "04:30", "04:45", "04:20",  "04:45", "06:00", "05:45", "05:35"],
    "Flight Journey Time": ["n/a", "n/a", "n/a", "n/a", "03:45", "03:45", "03:45",
                            "03:45", "n/a", "n/a", "03:55", "03:55", "n/a", "n/a", 
                            "n/a", "04:00", "n/a", "04:05", "04:10", "04:10", "04:10", 
                            "04:05", "04:10", "04:10", "04:10", "04:10", "04:15", "04:20"],
    "Distance": ["35", "150", "150", "155", "158", "188", "190", "193", 
                 "274", "288", "300", "300", "321", "336", "363", "364", "375", "410", "430", "438", "462", "467", "471", "478", "485", "510", "553", "600"]
}
print(len(data["Journey"]))
# Convert to DataFrame
df = pd.DataFrame(data)

# Add " km" unit to Distance values
df["Distance"] = df["Distance"].astype(str) + " km"

# Convert time format from "hr:min" to minutes for plotting
def time_to_minutes(time_str):
    if time_str == "n/a":
        return None
    hours, minutes = map(int, time_str.split(":"))
    return hours * 60 + minutes

# Apply the conversion
df["Direct Train Time"] = df["Direct Train Time"].apply(time_to_minutes)
df["Flight Journey Time"] = df["Flight Journey Time"].apply(time_to_minutes)

# Reshape data to long format for Plotly Express
df_long = df.melt(id_vars=["Journey", "Distance"],
                  value_vars=["Direct Train Time", "Flight Journey Time"],
                  var_name="Travel_Type", value_name="Journey_Time_Minutes")

# Add labels only for train time
df_long["Distance_Label"] = df_long.apply(
    lambda row: row["Distance"] if row["Travel_Type"] == "Direct Train Time" else None,
    axis=1
)

# Plot the graph with selective distance label
fig = px.bar(
    df_long,
    y="Journey",
    x="Journey_Time_Minutes",
    color="Travel_Type",
    text="Distance_Label",  # Display distance label only for Flight Journey Time bars
    title="Journey Times Between German Cities by Train and Plane",
    labels={
        "Journey": "Journey",
        "Journey_Time_Minutes": "Journey Time (Minutes)",
        "Distance_Label": "Distance"
    },
    barmode="group",
    height = 900
)

# Update layout to place the text outside the bars
fig.update_traces(textposition="outside")

# Show the plot
fig.show()

28


In [124]:
#compare 

data = pd.read_csv('Data/2020_no_short_haul_flights_from germany.csv')


# Reshape data to long format for Plotly Express
data_long = data.melt(id_vars='Distance_Range', value_vars=['Domestic', 'International'],
                      var_name='Flight_Type', value_name='Number_of_Flights')

# Create the bar chart
fig = px.bar(data_long, x='Distance_Range', y='Number_of_Flights', color='Flight_Type',
             barmode='group', title='Number of Domestic and International Flights by Distance Range (2020)',
             labels={'Distance_Range': 'Distance Range (Kilometers)', 'Number_of_Flights': 'Number of Flights'},
             text='Number_of_Flights')

# Display the plot
fig.show()


In [125]:
# Load data
file_path = 'Data/Eurostat_national_air_passengers_in_Germany_2002-2024.csv'
data = pd.read_csv(file_path)

# Yearly passenger data
monthly_passengers = data['OBS_VALUE']

# Calculate emissions per passenger
emissions_per_passenger = pd.Series([115 / 1000 * 313]) #115 gCO2 per passenger per km * average km of a German domestic flight


# Monthly emissions per passenger
monthly_emissions_per_passenger = emissions_per_passenger.loc[emissions_per_passenger.index.repeat(270)].iloc[:len(monthly_passengers)]

# Monthly emissions for Germany's domestic aviation for 2002-2024
monthly_emissions_germany = [(monthly_emissions_per_passenger.iloc[i] * monthly_passengers[i]) / (10**6) for i in range(len(monthly_passengers))]
monthly_emissions_germany = pd.DataFrame(monthly_emissions_germany, index=data['TIME_PERIOD'], columns=['TOT_MONTH_EMISS'])


#changing Index of the two dataframes for the merge
monthly_emissions_germany.index = [data['TIME_PERIOD']]
data.index = [data['TIME_PERIOD']]

#add monthly emissions as a column to the dataframe "data"
data = pd.concat([data, monthly_emissions_germany], axis= 1)
data.rename(columns = {0:'TOT_MONTH_EMISS'}, inplace= True)






print(data)

# Filter data for pre, post Covid periods
german_emissions_preCovid = data[data['TIME_PERIOD']  <= '2020-01']
german_emissions_postCovid = data[(data['TIME_PERIOD'] >= '2021-05') & (data['TIME_PERIOD'] <= '2023-04')]
german_emissions_after_regulation = data[data['TIME_PERIOD'] >= '2023-05']

# Define function to add and print trendline (equations)
def add_trendline_and_equation(fig, df, label, color):
    x = np.arange(len(df))
    y = df['TOT_MONTH_EMISS'].values

    slope, intercept = np.polyfit(x, y, 1)
    trendline = slope * x + intercept

    fig.add_scatter(x=df['TIME_PERIOD'], y=trendline, mode='lines', name=f'Trendline {label}', line=dict(color=color))
    print(f'Equation of trendline for {label}: y = {slope:.2f}x + {intercept:.2f}')

print(german_emissions_preCovid)
print(german_airports_postCovid.isna().sum())
print(german_emissions_after_regulation.isna().sum())

# Plotting
fig5 = px.line(data_frame = data,
            x = 'TIME_PERIOD',
            y = 'TOT_MONTH_EMISS',
            hover_data = ('TIME_PERIOD'),
            markers = False,
            labels = {
                'TIME_PERIOD' : 'Time',
                'TOT_MONTH_EMISS': 'Total emissions per month (MtCO2)'
            },


            title= 'Total national aviation emissions in Germany per month, 2002-2024'
)

fig5.update_traces(marker={'symbol': 'square'})
fig5.add_vline(x='2023-05', line_dash="dot", line_color="red")

# Add trendlines
add_trendline_and_equation(fig5, german_emissions_preCovid, 'pre-Covid', 'blue')
add_trendline_and_equation(fig5, german_emissions_postCovid, 'post-Covid', 'green')
add_trendline_and_equation(fig5, german_emissions_after_regulation, 'post-Covid', 'purple')

fig5.show()




                         DATAFLOW        LAST UPDATE freq unit tra_meas geo  \
TIME_PERIOD                                                                   
2002-01      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2002-02      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2002-03      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2002-04      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2002-05      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
...                           ...                ...  ...  ...      ...  ..   
2024-02      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2024-03      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2024-04      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2024-05      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00    M  PAS  PAS_CRD  DE   
2024-06      ESTAT:AVIA_PANC(1.0)  10/10/24 23:00:00