In [10]:
import pandas as pd

filenames = ['dep&ret_flights/dep&ret_flights_0320/athens_0320.csv', 
             'dep&ret_flights/dep&ret_flights_0327/athens_0327.csv', 
             'dep&ret_flights/dep&ret_flights_0403/athens_0403.csv', 
             'dep&ret_flights/dep&ret_flights_0410/athens_0410.csv', 
             'dep&ret_flights/dep&ret_flights_0417/athens_0417.csv',
             'dep&ret_flights/dep&ret_flights_0423/athens_0423.csv', 
             'dep&ret_flights/dep&ret_flights_0501/athens_0501.csv']

dataframes = [pd.read_csv(filename) for filename in filenames]

# Extract dates from filenames
dates = [filename.split('/')[-1].split('_')[1].split('.')[0] for filename in filenames]

# Combine all data frames with collection_date column
athens = pd.concat([df.assign(collection_date=date) for df, date in zip(dataframes, dates)])

# Convert collection_date to datetime type and add year 2023
athens['collection_date'] = pd.to_datetime(athens['collection_date'], format='%m%d').apply(lambda x: x.replace(year=2023))


In [21]:
def plot_city(city, color, kde_color):
    # Load city data
    filenames = [f'dep&ret_flights/dep&ret_flights_{date}/{city}_{date}.csv' for date in dates]
    dataframes = [pd.read_csv(filename) for filename in filenames]
    df = pd.concat([df.assign(collection_date=date) for df, date in zip(dataframes, dates)])
    
    # Convert collection_date to datetime type and add year 2023
    df['collection_date'] = pd.to_datetime(df['collection_date'], format='%m%d').apply(lambda x: x.replace(year=2023))

    # Calculate quartiles
    quartiles = df.groupby('collection_date')['price_amount'].quantile([0.25, 0.5, 0.75]).unstack(level=1)

    fig = go.Figure()

    # Convert numpy datetime64 to pandas Timestamp for date() method
    dates_pd = pd.to_datetime(df['collection_date'].unique())

    # Loop over collection dates
    for i, date in enumerate(dates_pd):
        df_date = df[df['collection_date'] == date]
        Q1, median, Q3 = quartiles.loc[date]

        # Calculate KDE
        kernel = stats.gaussian_kde(df_date['price_amount'])
        x_kde = np.linspace(df_date['price_amount'].min(), df_date['price_amount'].max(), 1000)
        y_kde = kernel(x_kde)

        # Add KDE trace for current date
        fig.add_trace(go.Scatter(x=x_kde, y=y_kde, 
                                 mode='lines',
                                 line_color=kde_color,
                                 opacity=0.6,
                                 fill='tozeroy',
                                 name=str(date.date()),
                                 visible=(i==0)
                                ))

        # Add shape for IQR
        fig.add_shape(go.layout.Shape(type="rect",
                                      x0=Q1, x1=Q3, y0=0, y1=1, yref="paper",
                                      fillcolor=color, opacity=0.5, 
                                      line_width=0,
                                      layer="below",
                                      visible=(i==0)
                                     ))

        # Add shape for median
        fig.add_shape(go.layout.Shape(type="line",
                                      x0=median, x1=median, y0=0, y1=1, yref="paper",
                                      line=dict(color=color, width=2),
                                      layer="below",
                                      visible=(i==0)
                                     ))

        # Add legend for quartiles and median
        fig.add_trace(go.Scatter(x=[None], y=[None],
                                 mode='markers',
                                 marker=dict(size=0, color=color),
                                 name=f'Q1: {Q1:.2f}, Median: {median:.2f}, Q3: {Q3:.2f}',
                                 visible=(i==0)
                                ))

    # Update layout
    fig.update_layout(barmode='overlay', title_text=f'Price Amount Distribution for {city.capitalize()}',
                      xaxis_title='Price Amount', yaxis_title='Density', 
                      sliders=[dict(steps=[dict(method='restyle',
                                                args=['visible', [i==j for j in range(len(dates_pd))]],
                                                label=str(date.date()))
                                          for i, date in enumerate(dates_pd)])])

    fig.show()

# Define the list of cities and their corresponding colors
city_color = {'Paris': 'blue', 'Berlin': 'green', 'Madrid': 'red', 'Lisbon': 'purple', 'Rome': 'orange', 'Budapest': 'brown', 'Athens': 'CadetBlue'}
kde_color = "gray"

# Loop over cities
for city, color in city_color.items():
    plot_city(city, color, kde_color)
