In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
#nbformat

In [3]:
base_dir = os.path.join(os.getcwd(), 'data')

def load_csv(folder_name):
    folder_path = os.path.join(base_dir, folder_name)
    file_name = f"{folder_name}.csv"
    file_path = os.path.join(folder_path, file_name)
    if os.path.exists(file_path):
        try:
            return pd.read_csv(file_path, low_memory=False)
        except Exception as e:
            print(f"Błąd podczas wczytywania pliku {file_name}: {e}")
            return None
    else:
        print(f"Plik {file_name} nie istnieje w folderze {folder_name}")
        return None

airport_traffic_df = load_csv('airport_traffic')
taxi_in_df = load_csv('taxi_in_additional_time')
taxi_out_df = load_csv('taxi_out_additional_time')
horizontal_flight_efficiency_df = load_csv('horizontal_flight_efficiency')
co2_emmissions_df = load_csv('co2_emmissions_by_state')

In [4]:
airport_traffic_df.tail(5)

Unnamed: 0,YEAR,MONTH_NUM,MONTH_MON,FLT_DATE,APT_ICAO,APT_NAME,STATE_NAME,FLT_DEP_1,FLT_ARR_1,FLT_TOT_1,FLT_DEP_IFR_2,FLT_ARR_IFR_2,FLT_TOT_IFR_2
3064299,2025,4,APR,2025-04-30,EGNX,East Midlands,United Kingdom,77,84,161,,,
3064300,2025,4,APR,2025-04-30,EGPD,Aberdeen,United Kingdom,61,61,122,,,
3064301,2025,4,APR,2025-04-30,EGPF,Glasgow,United Kingdom,101,102,203,,,
3064302,2025,4,APR,2025-04-30,EGPH,Edinburgh,United Kingdom,179,176,355,178.0,174.0,352.0
3064303,2025,4,APR,2025-04-30,EGSS,London - Stansted,United Kingdom,272,273,545,270.0,260.0,530.0


In [5]:
taxi_in_df.sample(5)

Unnamed: 0,YEAR,MONTH_NUM,MONTH_MON,APT_ICAO,APT_NAME,STATE_NAME,TF,VALID_FL,NO_REF,TOTAL_REF_NB_FL,TOTAL_REF_TIME_MIN,TOTAL_ADD_TIME_MIN,COMMENT
14018,2023,6,JUN,LOWW,Vienna,Austria,11097.0,11081.0,160.0,10921.0,48535.9,15236.1,
22770,2024,3,MAR,EBCI,Charleroi,Belgium,3260.0,3258.0,7.0,3251.0,12551.0,1644.0,
19530,2021,4,APR,ENZV,Stavanger,Norway,,,,,,,NO DATA RECEIVED
11542,2021,3,MAR,GCLP,Gran Canaria,Spain,2053.0,2053.0,180.0,1873.0,2211.6099999999988,1615.94,
871,2018,10,OCT,EDDN,Nuremberg,Germany,3195.0,2748.0,82.0,2666.0,6730.528333333333,2384.8050000000003,


In [6]:
taxi_out_df.head(5)

Unnamed: 0,YEAR,MONTH_NUM,MONTH_MON,APT_ICAO,APT_NAME,STATE_NAME,TF,VALID_FL,NO_REF,TOTAL_REF_NB_FL,TOTAL_REF_TIME_MIN,TOTAL_ADD_TIME_MIN,COMMENT
0,2018,1,JAN,LOWW,Vienna,Austria,8546,7841,366,7475,48963.5,18452.866666666698,
1,2018,1,JAN,EBBR,Brussels,Belgium,8449,8410,360,8050,61529.5,22937.5,
2,2018,1,JAN,EBCI,Charleroi,Belgium,2193,2184,36,2148,15858.4,4540.6,
3,2018,1,JAN,LBSF,Sofia,Bulgaria,2223,1751,140,1611,14961.0,4211.0,
4,2018,1,JAN,LDZA,Zagreb,Croatia,1518,1515,125,1390,9623.3,3335.7000000000003,


In [7]:
horizontal_flight_efficiency_df.head(5)

Unnamed: 0,YEAR,MONTH_NUM,MONTH_MON,ENTRY_DATE,ENTITY_NAME,ENTITY_TYPE,TYPE_MODEL,DIST_FLOWN_KM,DIST_DIRECT_KM,DIST_ACHIEVED_KM
0,2015,1,JAN,2015-01-01,Albania,State (FIR),CPF,54860,54818.31,54243.46
1,2015,1,JAN,2015-01-01,Albania,State (FIR),FTFM,57351,57203.44,56046.98
2,2015,1,JAN,2015-01-01,Armenia,State (FIR),FTFM,12569,12480.85,12203.53
3,2015,1,JAN,2015-01-01,Austria,State (FIR),CPF,276427,275679.17,271400.0
4,2015,1,JAN,2015-01-01,Austria,State (FIR),FTFM,293622,289908.71,281492.61


In [8]:
co2_emmissions_df.sample(5)

Unnamed: 0,YEAR,MONTH,STATE_NAME,STATE_CODE,CO2_QTY_TONNES,TF,NOTE,Unnamed: 7
13253,2021,2,KOSOVO,BK,5895.25965,445,False,
5042,2020,1,BELGIUM,EB,368320.597,12580,False,
17189,2013,7,PORTUGAL,LP,350361.47000000003,16194,True,
11849,2018,6,ARMENIA,UD,20914.59,1031,False,
11170,2017,2,DENMARK,EK,203729.25,12918,True,


In [40]:
airports_location_path = os.path.join(base_dir, 'airports.csv')  # Plik z lokalizacjami lotnisk
airports_location_df = pd.read_csv(airports_location_path)

# Połączenie danych `airport_traffic` z lokalizacjami na podstawie kodu ICAO
merged_df = pd.merge(
    airport_traffic_df,
    airports_location_df,
    left_on='APT_ICAO',
    right_on='ident',  # Kolumna z kodem ICAO w danych lokalizacji
    how='inner'
)

selected_date = '2022-03-04'
filtered_df = merged_df[merged_df['FLT_DATE'] == selected_date].copy()

filtered_df['FLT_TOT_1'] = pd.to_numeric(filtered_df['FLT_TOT_1'], errors='coerce')
filtered_df = filtered_df.dropna(subset=['FLT_TOT_1'])

selected_date_eu = pd.to_datetime(selected_date).strftime('%d.%m.%Y')

# Tworzenie wykresu mapy z bańkami
fig = px.scatter_geo(
    filtered_df,
    lat='latitude_deg', 
    lon='longitude_deg', 
    size='FLT_TOT_1',
    color='FLT_TOT_1',  # Kolor bańki na podstawie liczby lotów
    color_continuous_scale=px.colors.sequential.Bluered,
    range_color=[0, 1200],
    hover_name='APT_NAME',  # Nazwa lotniska w tooltipie
    hover_data={'FLT_TOT_1': True, 'STATE_NAME': True},
    #text='APT_NAME',
    title=f"RUCH LOTNICZY NA LOTNISKACH W DNIU {selected_date_eu} r.",
    projection="natural earth",
    labels={'FLT_TOT_1': 'LICZBA LOTÓW', 'STATE_NAME': 'KRAJ', 'latitude_deg': 'SZER. GEOGRAFICZNA', 'longitude_deg': 'DŁ. GEOGRAFICZNA', 'APT_NAME': 'NAZWA LOTNISKA'},
    #scope='europe'  # Zakres mapy na Europę
)

#fig.update_traces(textposition='top center')

# Dostosowanie wyglądu mapy
fig.update_layout(
    geo=dict(
        showland=True,
        landcolor="rgb(217, 217, 217)",
        showcountries=True,
        showocean=True,
        showlakes=False,
        oceancolor="#B7D3E0",
        countrycolor="rgb(100, 100, 100)",
        center=dict(lat=50, lon=10),
        lataxis=dict(range=[10, 60]),
        lonaxis=dict(range=[-30, 50]),
    ),
    width=1280,   # szerokość w pikselach
    height=720
)

# Wyświetlenie wykresu
fig.write_html(selected_date+".html")#renderer="browser")
fig.show()  # Zapisanie wykresu jako plik PNG

In [30]:
airport_traffic_df['FLT_DATE'] = pd.to_datetime(airport_traffic_df['FLT_DATE'], errors='coerce')

# Filtrowanie danych dla EPWA i roku 2021
import plotly.express as px

# Przygotowanie danych jak poprzednio
epwa_2021 = airport_traffic_df[
    (airport_traffic_df['APT_ICAO'] == 'EPWA') &
    (airport_traffic_df['FLT_DATE'].dt.year == 2019)
].copy()
epwa_2021['FLT_TOT_1'] = pd.to_numeric(epwa_2021['FLT_TOT_1'], errors='coerce')
epwa_2021['month'] = epwa_2021['FLT_DATE'].dt.month

polish_months = {
    1: 'Styczeń', 2: 'Luty', 3: 'Marzec', 4: 'Kwiecień',
    5: 'Maj', 6: 'Czerwiec', 7: 'Lipiec', 8: 'Sierpień',
    9: 'Wrzesień', 10: 'Październik', 11: 'Listopad', 12: 'Grudzień'
}
monthly_avg = epwa_2021.groupby('month')['FLT_TOT_1'].mean().reset_index()
monthly_avg['month_name'] = monthly_avg['month'].map(polish_months)

# Wykres liniowy w Plotly
fig = px.line(
    monthly_avg,
    x='month_name',
    y='FLT_TOT_1',
    markers=True,
    title='Średnia liczba lotów na Lotnisku Chopina w Warszawie (2019 rok)',
    labels={'month_name': 'Miesiąc', 'FLT_TOT_1': 'Średnia liczba lotów'}
)

fig.update_traces(
    line=dict(color='#0072B5', width=4),
    marker=dict(size=12, color='#FF9900', line=dict(width=2, color='white'))
)

fig.update_layout(
    xaxis=dict(
        tickangle=-35,
        showgrid=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    yaxis=dict(
        range=[-10, 610],
        showgrid=True,
        gridcolor='rgba(200,200,200,0.3)',
        zeroline=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    plot_bgcolor='white',
    font=dict(size=16, family='Arial'),
    title=dict(x=0.5, font=dict(size=22, family='Arial', color='#222')),
    margin=dict(l=60, r=30, t=70, b=60),
    width=1000,
    height=550
)

fig.show()

In [29]:
airport_traffic_df['FLT_DATE'] = pd.to_datetime(airport_traffic_df['FLT_DATE'], errors='coerce')

# Filtrowanie danych dla EPWA i roku 2021
import plotly.express as px

# Przygotowanie danych jak poprzednio
epwa_2021 = airport_traffic_df[
    (airport_traffic_df['APT_ICAO'] == 'EPWA') &
    (airport_traffic_df['FLT_DATE'].dt.year == 2020)
].copy()
epwa_2021['FLT_TOT_1'] = pd.to_numeric(epwa_2021['FLT_TOT_1'], errors='coerce')
epwa_2021['month'] = epwa_2021['FLT_DATE'].dt.month

polish_months = {
    1: 'Styczeń', 2: 'Luty', 3: 'Marzec', 4: 'Kwiecień',
    5: 'Maj', 6: 'Czerwiec', 7: 'Lipiec', 8: 'Sierpień',
    9: 'Wrzesień', 10: 'Październik', 11: 'Listopad', 12: 'Grudzień'
}
monthly_avg = epwa_2021.groupby('month')['FLT_TOT_1'].mean().reset_index()
monthly_avg['month_name'] = monthly_avg['month'].map(polish_months)

# Wykres liniowy w Plotly
fig = px.line(
    monthly_avg,
    x='month_name',
    y='FLT_TOT_1',
    markers=True,
    title='Średnia liczba lotów na Lotnisku Chopina w Warszawie (2020 rok)',
    labels={'month_name': 'Miesiąc', 'FLT_TOT_1': 'Średnia liczba lotów'}
)

fig.update_traces(
    line=dict(color='#0072B5', width=4),
    marker=dict(size=12, color='#FF9900', line=dict(width=2, color='white'))
)

fig.update_layout(
    xaxis=dict(
        tickangle=-35,
        showgrid=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    yaxis=dict(
        range=[-10, 610],
        showgrid=True,
        gridcolor='rgba(200,200,200,0.3)',
        zeroline=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    plot_bgcolor='white',
    font=dict(size=16, family='Arial'),
    title=dict(x=0.5, font=dict(size=22, family='Arial', color='#222')),
    margin=dict(l=60, r=30, t=70, b=60),
    width=1000,
    height=550
)

fig.show()

In [34]:
airport_traffic_df['FLT_DATE'] = pd.to_datetime(airport_traffic_df['FLT_DATE'], errors='coerce')

# Filtrowanie danych dla EPWA i roku 2021
import plotly.express as px

# Przygotowanie danych jak poprzednio
epwa_2021 = airport_traffic_df[
    (airport_traffic_df['APT_ICAO'] == 'EPWA') &
    (airport_traffic_df['FLT_DATE'].dt.year == 2021)
].copy()
epwa_2021['FLT_TOT_1'] = pd.to_numeric(epwa_2021['FLT_TOT_1'], errors='coerce')
epwa_2021['month'] = epwa_2021['FLT_DATE'].dt.month

polish_months = {
    1: 'Styczeń', 2: 'Luty', 3: 'Marzec', 4: 'Kwiecień',
    5: 'Maj', 6: 'Czerwiec', 7: 'Lipiec', 8: 'Sierpień',
    9: 'Wrzesień', 10: 'Październik', 11: 'Listopad', 12: 'Grudzień'
}
monthly_avg = epwa_2021.groupby('month')['FLT_TOT_1'].mean().reset_index()
monthly_avg['month_name'] = monthly_avg['month'].map(polish_months)

# Wykres liniowy w Plotly
fig = px.line(
    monthly_avg,
    x='month_name',
    y='FLT_TOT_1',
    markers=True,
    title='Średnia liczba lotów na Lotnisku Chopina w Warszawie (2021 rok)',
    labels={'month_name': 'Miesiąc', 'FLT_TOT_1': 'Średnia liczba lotów'}
)

fig.update_traces(
    line=dict(color='#0072B5', width=4),
    marker=dict(size=12, color='#FF9900', line=dict(width=2, color='white'))
)

fig.update_layout(
    xaxis=dict(
        tickangle=-35,
        showgrid=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    yaxis=dict(
        range=[-10, 610],
        showgrid=True,
        gridcolor='rgba(200,200,200,0.3)',
        zeroline=False,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    plot_bgcolor='white',
    font=dict(size=16, family='Arial'),
    title=dict(x=0.5, font=dict(size=22, family='Arial', color='#222')),
    margin=dict(l=60, r=30, t=70, b=60),
    width=1000,
    height=550
)

fig.show()

In [56]:
import plotly.express as px

# Przygotowanie danych
airports = {
    'Warszawa': 'EPWA',
    'Londyn': 'EGLL',
    'Frankfurt': 'EDDF'
}
years = [2019, 2024]

data = []
for city, icao in airports.items():
    for year in years:
        avg = airport_traffic_df[
            (airport_traffic_df['APT_ICAO'] == icao) &
            (pd.to_datetime(airport_traffic_df['FLT_DATE'], errors='coerce').dt.year == year)
        ]['FLT_TOT_1'].astype(float).mean()
        data.append({'Lotnisko': city, 'Rok': str(year), 'Średnia liczba lotów': int(round(avg))})

df_bar = pd.DataFrame(data)

# Wykres słupkowy z ładnym formatowaniem, każda para obok siebie
fig = px.bar(
    df_bar,
    x='Lotnisko',
    y='Średnia liczba lotów',
    color='Rok',
    barmode='group',
    text='Średnia liczba lotów',
    color_discrete_map={'2019': 'indianred', '2024': 'lightsalmon'},
    category_orders={'Rok': ['2019', '2024']}
)

fig.update_traces(
    texttemplate='%{text}', 
    textposition='outside',
    marker_line_width=1,
    marker_line_color='white',
    width=0.3
)

fig.update_layout(
    title=dict(
        text='Średnia liczba lotów dziennie na wybranych lotniskach',
        x=0.5,
        font=dict(size=22, family='Arial', color='#222')
    ),
    xaxis=dict(
        title='Lotnisko',
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333')
    ),
    yaxis=dict(
        title='Średnia liczba lotów dziennie',
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333'),
        showgrid=True,
        gridcolor='rgba(200,200,200,0.3)'
    ),
    legend=dict(
        title='Rok',
        orientation='v',
        yanchor='top',
        y=0.98,
        xanchor='left',
        x=1.02,
        font=dict(size=16, family='Arial'),
        bgcolor='rgba(255,255,255,0.7)',
        bordercolor='#ccc',
        borderwidth=1
    ),
    plot_bgcolor='white',
    font=dict(size=16, family='Arial'),
    margin=dict(l=60, r=60, t=90, b=60),
    width=950,
    height=550
)

fig.show()

In [83]:
import plotly.graph_objects as go
from matplotlib import cm
import matplotlib.colors as mcolors
import numpy as np

airports = {
    'Warszawa': 'EPWA',
    'Londyn': 'EGLL',
    'Frankfurt': 'EDDF'
}
years = list(range(2019, 2025))

# Przygotuj gradienty dla każdego lotniska
def get_gradient(cmap_name, n):
    cmap = cm.get_cmap(cmap_name, n+2)
    return [mcolors.to_hex(cmap(i+2)[:3]) for i in range(n)]

warszawa_colors = get_gradient('Blues', len(years))      # niebieski
londyn_colors   = get_gradient('Greens', len(years))     # zielony
frankfurt_colors= get_gradient('Oranges', len(years))    # pomarańczowy

airport_colors = {
    'Warszawa': warszawa_colors,
    'Londyn': londyn_colors,
    'Frankfurt': frankfurt_colors
}
airport_legend_label = {
    'Warszawa': 'Warszawa',
    'Londyn': 'Londyn',
    'Frankfurt': 'Frankfurt'
}

data = []
for city, icao in airports.items():
    for year in years:
        avg = airport_traffic_df[
            (airport_traffic_df['APT_ICAO'] == icao) &
            (pd.to_datetime(airport_traffic_df['FLT_DATE'], errors='coerce').dt.year == year)
        ]['FLT_TOT_1'].astype(float).mean()
        data.append({'Lotnisko': city, 'Rok': str(year), 'Średnia liczba lotów': int(round(avg))})

df_bar = pd.DataFrame(data)

fig = go.Figure()

# Przygotuj x_labels z odstępami
x_labels = []
x_ticks = []
for city in airports.keys():
    for year in years:
        x_labels.append(f"{year}")
        x_ticks.append(f"{city}_{year}")
    x_labels.append("")  # odstęp
    x_ticks.append(f"spacer_{city}")

# Dodaj słupki: każdy trace to jedno miasto, 6 słupków (lata), odstęp po każdym mieście
for city in airports.keys():
    colors = airport_colors[city]
    y_vals = []
    text_vals = []
    x_vals = []
    for i, year in enumerate(years):
        val = df_bar[(df_bar['Lotnisko'] == city) & (df_bar['Rok'] == str(year))]['Średnia liczba lotów'].values
        y_vals.append(val[0] if len(val) > 0 else None)
        text_vals.append(val[0] if len(val) > 0 else "")
        x_vals.append(f"{city}_{year}")
    # Dodaj puste miejsce na koniec serii (dla odstępu)
    y_vals.append(None)
    text_vals.append("")
    x_vals.append(f"spacer_{city}")
    # Słupki z gradientem (bez legendy)
    fig.add_trace(go.Bar(
        x=x_vals,
        y=y_vals,
        name=None,
        marker_color=colors + ['rgba(0,0,0,0)'],
        text=text_vals,
        textposition='outside',
        showlegend=False
    ))
    # Dummy trace tylko do legendy, z kolorem środkowym
    fig.add_trace(go.Bar(
        x=[None],
        y=[None],
        name=airport_legend_label[city],
        marker_color=colors[len(colors)//2],
        showlegend=True,
        hoverinfo='skip'
    ))

fig.update_layout(
    barmode='group',
    title=dict(
        text='Ruch na wybranych lotniskach w latach 2019-2024',
        x=0.5,
        font=dict(size=22, family='Arial', color='#222')
    ),
    xaxis=dict(
        title='Rok',
        tickvals=x_ticks,
        ticktext=x_labels,
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333'),
    ),
    yaxis=dict(
        range=[-190, 1590],
        title='Średnia liczba lotów rocznie',
        tickfont=dict(size=16, family='Arial'),
        title_font=dict(size=18, family='Arial', color='#333'),
        showgrid=True,
        gridcolor='rgba(200,200,200,0.3)'
    ),
    legend=dict(
        title='Miasto',
        orientation='h',
        yanchor='top',
        y=1.15,           # Wyżej niż tytuł, nad wykresem
        xanchor='center',
        x=0.5,
        font=dict(size=16, family='Arial'),
        bgcolor='rgba(255,255,255,0.7)',
        bordercolor='#ccc',
        borderwidth=1
    ),
    bargap=0.35,
    plot_bgcolor='white',
    font=dict(size=16, family='Arial'),
    margin=dict(l=60, r=60, t=150, b=60),  # większy górny margines na legendę
    width=1400,
    height=550
)

fig.show()


The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.

