In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
import calendar
#Styles
sns.set(style="whitegrid")
plt.style.use('ggplot')
pd.set_option('display.float_format', '{:.2f}'.format)
import plotly.io as pio
pio.templates.default = "plotly_white"

# Load dataset
df = pd.read_csv('clean.csv')

# Preprocess data
df['Date'] = pd.to_datetime(df['Date'])
df['Day_of_Week'] = df['Date'].dt.day_name()
df['Week'] = df['Date'].dt.isocalendar().week
df['Month'] = df['Date'].dt.month_name()
df['Route'] = df['From'] + '-' + df['To']

# Map airport codes to countries for ranking section
df['Route'] = df['From'] + '-' + df['To']
country_map = {
    'BKK': 'Thailand', 'CAI': 'Egypt', 'FRA': 'Germany', 'HKG': 'Hong Kong',
    'ICN': 'South Korea', 'KUL': 'Malaysia', 'LAX': 'USA', 'TPE': 'Taiwan', 'NRT' : 'Japan', 'SGN' : 'Vietnam', 'SYD' : 'Australia'
}
df['Country'] = df.apply(lambda x: country_map[x['From']] if x['To'] == 'TPE' else country_map[x['To']], axis=1)
df['Country'] = df.apply(
        lambda x: country_map[x['From']] if x['To'] == 'TPE' else country_map[x['To']], 
        axis=1
    )
    
    # Create direction column (to/from Taipei)
df['Direction'] = df.apply(
        lambda x: 'To Taipei' if x['To'] == 'TPE' else 'From Taipei', 
        axis=1
    )

# Display first few rows
print("Dataset Preview:")
print(df.head())

Dataset Preview:
  From   To       Date  Flight Duration  Stops  Price  co2 emissions  \
0  BKK  TPE 2025-06-01              230      0   2927            210   
1  BKK  TPE 2025-06-02              220      0   4850            213   
2  BKK  TPE 2025-06-03              230      0   3531            208   
3  BKK  TPE 2025-06-04              230      0   3230            208   
4  BKK  TPE 2025-06-05              230      0   2965            223   

  Day_of_Week  Week Month    Route   Country  Direction  
0      Sunday    22  June  BKK-TPE  Thailand  To Taipei  
1      Monday    23  June  BKK-TPE  Thailand  To Taipei  
2     Tuesday    23  June  BKK-TPE  Thailand  To Taipei  
3   Wednesday    23  June  BKK-TPE  Thailand  To Taipei  
4    Thursday    23  June  BKK-TPE  Thailand  To Taipei  


Artifact 2: Descriptive Statistics

In [8]:
from IPython.display import display, Markdown
import warnings
warnings.filterwarnings('ignore')

# Create Stops_Category if not exists
if 'Stops_Category' not in df.columns:
    df['Stops_Category'] = df['Stops'].map({0: 'Direct', 1: '1 Stop', 2: '2+ Stops'})

# Define color palette
country_palette = px.colors.qualitative.Plotly

# 1. Core Metrics Distribution by Country (Box Plots)
fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=('Flight Duration by Country (minutes)', 
                    'Price by Country (USD)', 
                    'CO₂ Emissions by Country (kg)'),
    vertical_spacing=0.1
)

# Get top 10 countries by sample size
top_countries = df['Country'].value_counts().index[:10]

for i, metric in enumerate(['Flight Duration', 'Price', 'co2 emissions'], 1):
    for country in top_countries:
        fig.add_trace(go.Box(
            y=df[df['Country'] == country][metric],
            name=country,
            marker_color=country_palette[list(top_countries).index(country) % len(country_palette)],
            showlegend=(i==1)  # Only show legend for first subplot
        ), row=i, col=1)

fig.update_layout(
    height=900,
    title_text="<b>Core Metrics Distribution by Country</b>",
    title_x=0.5,
    margin=dict(t=100, b=100)
)
fig.show()
# 2. Correlation Matrix
corr_matrix = df[['Price', 'Flight Duration', 'co2 emissions']].corr()

corr_fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='RdBu',
    zmin=-1,
    zmax=1,
    text=np.round(corr_matrix.values, 2),
    texttemplate="%{text}",
    hoverinfo="x+y+z"
))

corr_fig.update_layout(
    title='<b>Correlation Matrix Between Key Metrics</b>',
    xaxis_title="Metric",
    yaxis_title="Metric",
    height=400,
    width=500,
    margin=dict(t=100)
)
corr_fig.show()

def create_ranking_figure(stats_df, title, value_suffix=''):
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=['<b>Country</b>', '<b>Average</b>', '<b>Minimum</b>', '<b>Maximum</b>'],
            fill_color='paleturquoise',
            align='center',
            font=dict(size=14)
        ),
        cells=dict(
            values=[stats_df.index, 
                   stats_df.iloc[:, 0].round(1).astype(str) + value_suffix,
                   stats_df.iloc[:, 1].round(1).astype(str) + value_suffix,
                   stats_df.iloc[:, 2].round(1).astype(str) + value_suffix],
            fill_color='lavender',
            align='center'
        )
    )])
    
    fig.update_layout(
        title=f'<b>{title}</b>',
        margin=dict(t=80, l=10, r=10, b=10),
        height=400 + len(stats_df)*20
    )
    return fig

# 1. Price Ranking Figure
price_stats = df.groupby('Country')['Price'].agg(['mean', 'min', 'max'])\
               .sort_values('mean', ascending=False)
price_fig = create_ranking_figure(price_stats, '💵 Flight Price Ranking (USD)', ' $')
price_fig.show()

# 2. Duration Ranking Figure
duration_stats = df.groupby('Country')['Flight Duration'].agg(['mean', 'min', 'max'])\
                  .sort_values('mean', ascending=False)
duration_fig = create_ranking_figure(duration_stats, '⏱️ Flight Duration Ranking (minutes)', ' min')
duration_fig.show()

# 3. Emissions Ranking Figure
emission_stats = df.groupby('Country')['co2 emissions'].agg(['mean', 'min', 'max'])\
                   .sort_values('mean', ascending=False)
emission_fig = create_ranking_figure(emission_stats, '🌱 CO₂ Emissions Ranking (kg)', ' kg')
emission_fig.show()

Artifact 3: Trend and Temporal Analysis

In [None]:
# %% [markdown]
### 🗓️ Interactive Daily Flight Price Trends

# %%
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import widgets, Layout
from IPython.display import display, HTML

# Custom CSS styling
display(HTML("""
<style>
    .widget-label {
        font-weight: bold !important;
        color: #2c3e50 !important;
    }
    .widget-dropdown, .widget-radio {
        background: #f8f9fa !important;
        border-radius: 8px !important;
        padding: 8px !important;
        border: 1px solid #dee2e6 !important;
    }
</style>
"""))

# Create control widgets with better styling
country_dropdown = widgets.Dropdown(
    options=['All Countries'] + sorted(df['Country'].unique()),
    value='All Countries',
    description='🌍 Country:',
    layout=Layout(width='400px'),
    style={'description_width': '100px'}
)

direction_radio = widgets.RadioButtons(
    options=['Both Directions', 'From Taipei', 'To Taipei'],
    value='Both Directions',
    description='✈️ Direction:',
    layout=Layout(width='400px'),
    style={'description_width': '100px'}
)

# Create the visualization function with enhanced styling
def plot_daily_prices(country, direction):
    # Filter data based on selections
    filtered_df = df.copy()
    
    if country != 'All Countries':
        filtered_df = filtered_df[filtered_df['Country'] == country]
    
    if direction == 'From Taipei':
        filtered_df = filtered_df[filtered_df['From'] == 'TPE']
    elif direction == 'To Taipei':
        filtered_df = filtered_df[filtered_df['To'] == 'TPE']
    
    # Create the figure with enhanced styling
    fig = go.Figure()
    
    # Add each route as a separate trace for better control
    for route in filtered_df['Route'].unique():
        route_df = filtered_df[filtered_df['Route'] == route]
        fig.add_trace(go.Scatter(
            x=route_df['Date'],
            y=route_df['Price'],
            name=route,
            mode='lines',
            line_shape='spline',
            line=dict(width=2.5),
            hovertemplate=(
                "<b>%{x|%b %d}</b><br>" +
                "Route: %{fullData.name}<br>" +
                "Price: $%{y:,.0f}<extra></extra>"
            )
        ))
    
    # Update layout for professional look
    fig.update_layout(
        title=dict(
            text="<b>✈️ Daily Flight Price Trends</b>",
            x=0.05,
            xanchor='left',
            font=dict(size=22, color='#2c3e50')
        ),
        plot_bgcolor='rgba(248,249,250,1)',
        paper_bgcolor='rgba(248,249,250,1)',
        hovermode='x unified',
        xaxis=dict(
            showgrid=True,
    gridcolor='rgba(222,226,230,0.7)',
    tickformat='%b %d',
    rangeslider=dict(
        visible=True,
        thickness=0.08,
        bgcolor='rgba(222,226,230,0.4)',
        bordercolor='#dee2e6',
        borderwidth=1
    ),
    rangeselector=dict(
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='#dee2e6',
        borderwidth=1,
        font=dict(color='#495057'),
        buttons=list([
            dict(
                count=7,
                label='1 Week',
                step='day',
                stepmode='backward'
            ),
            dict(
                count=14,
                label='2 Weeks',
                step='day',
                stepmode='backward'
            ),
            dict(
                count=1,
                label='1 Month',
                step='month',
                stepmode='backward'
            ),
            dict(
                label='Full Range',
                step='all'
            )
        ])
    )
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor='rgba(222,226,230,0.7)',
            tickprefix='$',
            title='Price (USD)'
        ),
        font=dict(
            family='Arial',
            size=12,
            color='#495057'
        ),
        margin=dict(l=80, r=40, t=100, b=80),
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=-0.3,
            xanchor='center',
            x=0.5
        ),
        height=650
    )
    
    # Add watermark
    fig.add_annotation(
        x=1, y=-0.25,
        xref='paper', yref='paper',
        text="✈️ Flight Data Analysis | Source: Clean.csv",
        showarrow=False,
        font=dict(size=10, color='#adb5bd')
    )
    
    # Add interactive features
    fig.update_traces(
        hoverlabel=dict(
            bgcolor='white',
            font_size=12,
            font_family="Arial"
        )
    )
    
    fig.show()

# Create interactive widget with better layout
controls = widgets.VBox([
    widgets.HBox([country_dropdown, direction_radio]),
], layout=Layout(
    padding='20px',
    border='1px solid #dee2e6',
    border_radius='12px',
    margin='0 0 20px 0'
))

# Display the controls and visualization
display(controls)
widgets.interactive_output(
    plot_daily_prices,
    {'country': country_dropdown, 'direction': direction_radio}
)

VBox(children=(HBox(children=(Dropdown(description='🌍 Country:', layout=Layout(width='400px'), options=('All C…

Output()

Artifact 4: Correlation and Relationship Insights

In [19]:
# %% [markdown]
### 🌍 Geographic Route Comparison with Metrics

# %%
import plotly.graph_objects as go
import pandas as pd
from ipywidgets import widgets, Layout, Output
from IPython.display import display, clear_output

# Prepare data - calculate averages
route_stats = df.groupby(['From', 'To', 'Country']).agg({
    'Price': 'mean',
    'Flight Duration': 'mean',
    'co2 emissions': 'mean'
}).reset_index()

# Get coordinates for each city (replace with your actual coordinates)
city_coords = {
    'TPE': {'lat': 25.0330, 'lon': 121.5654, 'name': 'Taipei'},
    'BKK': {'lat': 13.7563, 'lon': 100.5018, 'name': 'Bangkok'},
    'CAI': {'lat': 30.0444, 'lon': 31.2357, 'name': 'Cairo'},
    'FRA': {'lat': 50.1109, 'lon': 8.6821, 'name': 'Frankfurt'},
    'HKG': {'lat': 22.3193, 'lon': 114.1694, 'name': 'Hong Kong'},
    'ICN': {'lat': 37.4602, 'lon': 126.4407, 'name': 'Seoul'},
    'KUL': {'lat': 3.1390, 'lon': 101.6869, 'name': 'Kuala Lumpur'},
    'LAX': {'lat': 34.0522, 'lon': -118.2437, 'name': 'Los Angeles'},
    'NRT': {'lat': 35.7647, 'lon': 140.3865, 'name': 'Tokyo'},
    'SGN': {'lat': 10.8231, 'lon': 106.6297, 'name': 'Ho Chi Minh City'},
    'SYD': {'lat': -33.8688, 'lon': 151.2093, 'name': 'Sydney'}
}

# Create widgets
country_dropdown = widgets.Dropdown(
    options=['All Countries'] + sorted(df['Country'].unique()),
    value='All Countries',
    description='🌍 Select Country:',
    layout=Layout(width='400px'),
    style={'description_width': '120px'}
)

# Create output area for metrics
metrics_output = Output()

# Create visualization function
def plot_geo_routes(country):
    with metrics_output:
        clear_output(wait=True)
        
        fig = go.Figure()
        
        # Filter data based on selection
        if country == 'All Countries':
            filtered_df = route_stats[route_stats['To'] == 'TPE']  # All flights to Taipei
            display(HTML("<h3 style='color:#2C3E50'>Global Routes to Taipei</h3>"))
        else:
            filtered_df = route_stats[
            ((route_stats['From'] == 'TPE') | (route_stats['To'] == 'TPE')) &

            (route_stats['Country'] == country)
            ]
            
            # Calculate and display metrics
            to_tpe = filtered_df[filtered_df['To'] == 'TPE']
            from_tpe = filtered_df[filtered_df['From'] == 'TPE']
            
            if not to_tpe.empty:
                avg_price_to = to_tpe['Price'].mean()
                avg_duration_to = to_tpe['Flight Duration'].mean()
                avg_co2_to = to_tpe['co2 emissions'].mean()
            else:
                avg_price_to = avg_duration_to = avg_co2_to = 0
                
            if not from_tpe.empty:
                avg_price_from = from_tpe['Price'].mean()
                avg_duration_from = from_tpe['Flight Duration'].mean()
                avg_co2_from = from_tpe['co2 emissions'].mean()
            else:
                avg_price_from = avg_duration_from = avg_co2_from = 0
            
            # Create metrics display
            metrics_html = f"""
            <div style="background:#f8f9fa; padding:15px; border-radius:8px; margin-bottom:15px">
                <h3 style='color:#2C3E50; margin-top:0'>Metrics for {country}</h3>
                <div style="display:flex; justify-content:space-between">
                    <div style="width:48%; background:white; padding:10px; border-radius:6px; box-shadow:0 2px 4px rgba(0,0,0,0.1)">
                        <h4 style='color:#FF6B6B; margin-top:0'>To Taipei</h4>
                        <p>✈️ Avg Price: <b>${avg_price_to:,.0f}</b></p>
                        <p>⏱️ Avg Duration: <b>{avg_duration_to:,.0f} min</b></p>
                        <p>🌱 Avg CO₂: <b>{avg_co2_to:,.0f} kg</b></p>
                    </div>
                    <div style="width:48%; background:white; padding:10px; border-radius:6px; box-shadow:0 2px 4px rgba(0,0,0,0.1)">
                        <h4 style='color:#4ECDC4; margin-top:0'>From Taipei</h4>
                        <p>✈️ Avg Price: <b>${avg_price_from:,.0f}</b></p>
                        <p>⏱️ Avg Duration: <b>{avg_duration_from:,.0f} min</b></p>
                        <p>🌱 Avg CO₂: <b>{avg_co2_from:,.0f} kg</b></p>
                    </div>
                </div>
            </div>
            """
            display(HTML(metrics_html))
        
        # Add routes to the figure
        for _, row in filtered_df.iterrows():
            from_city = row['From']
            to_city = row['To']
            
            # Get coordinates
            start_lon = city_coords[from_city]['lon']
            start_lat = city_coords[from_city]['lat']
            end_lon = city_coords[to_city]['lon']
            end_lat = city_coords[to_city]['lat']
            
            # Color based on direction
            line_color = '#FF6B6B' if to_city == 'TPE' else '#4ECDC4'
            line_width = 3 if country != 'All Countries' else 2
            
            # Add route line
            fig.add_trace(go.Scattergeo(
                lon = [start_lon, end_lon],
                lat = [start_lat, end_lat],
                mode = 'lines',
                line = dict(width=line_width, color=line_color),
                name = f"{city_coords[from_city]['name']} ↔ {city_coords[to_city]['name']}",
                hoverinfo = 'text',
                hovertext = (
                    f"<b>Route:</b> {from_city} → {to_city}<br>"
                    f"<b>Avg Price:</b> ${row['Price']:,.0f}<br>"
                    f"<b>Avg Duration:</b> {row['Flight Duration']:,.0f} min<br>"
                    f"<b>Avg CO₂:</b> {row['co2 emissions']:,.0f} kg"
                )
            ))
        
        # Add city markers
        for city, coords in city_coords.items():
            fig.add_trace(go.Scattergeo(
                lon = [coords['lon']],
                lat = [coords['lat']],
                mode = 'markers+text',
                marker = dict(size=10, color='#2C3E50'),
                text = coords['name'],
                textposition = 'top center',
                hoverinfo = 'none',
                showlegend = False
            ))
        
        # Update layout for globe view
        fig.update_layout(
            geo = dict(
                projection_type = 'orthographic',
                showland = True,
                landcolor = 'rgb(243, 243, 243)',
                countrycolor = 'rgb(204, 204, 204)',
                showocean = True,
                oceancolor = 'rgb(222, 243, 246)',
                showcountries = True,
                showframe = False,
                center = dict(lon=121, lat=25),  # Focus on Taiwan
                lataxis_range = [-60, 80],  # Limit polar regions
                lonaxis_range = [-180, 180]
            ),
            height = 600,
            margin = dict(l=0, r=0, t=0, b=0),
            plot_bgcolor = 'rgba(248,249,250,1)',
            paper_bgcolor = 'rgba(248,249,250,1)',
            legend = dict(
                orientation='h',
                yanchor='bottom',
                y=-0.1,
                xanchor='center',
                x=0.5
            )
        )
        
        fig.show()

# Create interactive widget
controls = widgets.VBox([
    widgets.HBox([country_dropdown]),
    metrics_output
], layout=Layout(
    padding='20px',
    border='1px solid #dee2e6',
    border_radius='12px',
    margin='0 0 20px 0'
))

# Display the controls and visualization
display(controls)
widgets.interactive_output(plot_geo_routes, {'country': country_dropdown})

VBox(children=(HBox(children=(Dropdown(description='🌍 Select Country:', layout=Layout(width='400px'), options=…

Output()

Artifact 5: Route-Based Comparisons

In [None]:
# %% [markdown]
### 📅 Price & Emissions Heatmaps by Travel Date

# %%
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import widgets, Layout
from IPython.display import display, HTML
import numpy as np

metric_selector = widgets.RadioButtons(
    options=['Price', 'CO₂ Emissions'],
    value='Price',
    description='Metric:',
    layout=Layout(width='200px'),
    style={'description_width': '100px'}
)

direction_selector = widgets.RadioButtons(
    options=['From Taipei', 'To Taipei'],
    value='From Taipei',
    description='Direction:',
    layout=Layout(width='200px'),
    style={'description_width': '100px'}
)

route_selector = widgets.Dropdown(
    options=sorted(df['Route'].unique()),
    value='BKK-TPE',
    description='Route:',
    layout=Layout(width='300px'),
    style={'description_width': '100px'}
)

# Create visualization function
def plot_heatmap(metric, direction, route):
    # Filter data
    if direction == 'From Taipei':
        filtered_df = df[(df['From'] == 'TPE') & (df['Route'] == route)]
    else:
        filtered_df = df[(df['To'] == 'TPE') & (df['Route'] == route)]
    
    # Create pivot table (days x months)
    pivot_df = filtered_df.pivot_table(
        values='Price' if metric == 'Price' else 'co2 emissions',
        index=filtered_df['Date'].dt.day,
        columns=filtered_df['Date'].dt.month,
        aggfunc='mean'
    )
    
    # Create custom hover text
    hover_text = []
    for day in range(1, 32):
        hover_row = []
        for month in range(6, 9):  # June-August
            val = pivot_df.loc[day, month] if day in pivot_df.index and month in pivot_df.columns else np.nan
            if not np.isnan(val):
                hover_row.append(
                    f"<b>{month}/{day}</b><br>"
                    f"{'Price' if metric == 'Price' else 'CO₂'}: "
                    f"{'$' if metric == 'Price' else ''}{val:.0f}{'' if metric == 'Price' else 'kg'}"
                )
            else:
                hover_row.append("No data")
        hover_text.append(hover_row)
    
    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=pivot_df.values,
        x=pivot_df.columns.map(lambda x: f'2025-{x:02d}'),  # Format as month names
        y=pivot_df.index,
        colorscale='Viridis' if metric == 'Price' else 'Greens',
        colorbar=dict(
            title='Price (USD)' if metric == 'Price' else 'CO₂ (kg)',
            titleside='right'
        ),
        hoverinfo='text',
        text=hover_text,
        texttemplate="%{text}",
        zmin=filtered_df['Price'].min() if metric == 'Price' else filtered_df['co2 emissions'].min(),
        zmax=filtered_df['Price'].max() if metric == 'Price' else filtered_df['co2 emissions'].max()
    ))
    
    # Update layout
    fig.update_layout(
        title=f"<b>{metric} Heatmap ({direction}) - {route}</b><br><sub>Hover to see exact values</sub>",
        xaxis_title="Month",
        yaxis_title="Day of Month",
        height=600,
        plot_bgcolor='white',
        paper_bgcolor='white',
        font=dict(family='Arial', size=12),
        margin=dict(l=50, r=50, t=100, b=50),
        xaxis=dict(
            tickmode='array',
            tickvals=pivot_df.columns,
            ticktext=['Jun', 'Jul', 'Aug']
        ),
        yaxis=dict(
            autorange='reversed',
            tickmode='linear',
            dtick=1
        )
    )
    
    # Add optimal day annotations
    if metric == 'Price':
        optimal_val = filtered_df['Price'].min()
    else:
        optimal_val = filtered_df['co2 emissions'].min()
    
    optimal_days = filtered_df[
        (filtered_df['Price' if metric == 'Price' else 'co2 emissions'] == optimal_val)
    ]['Date']
    
    for day in optimal_days:
        fig.add_annotation(
            x=day.month,
            y=day.day,
            text="⭐",
            showarrow=False,
            font=dict(size=15, color='gold')
        )
    
    fig.show()

# Create control panel
controls = widgets.VBox([
    widgets.HBox([metric_selector, direction_selector, route_selector]),
], layout=Layout(
    padding='20px',
    border='1px solid #dee2e6',
    border_radius='12px',
    margin='0 0 20px 0'
))

# Display the controls and visualization
display(controls)
widgets.interactive_output(
    plot_heatmap,
    {'metric': metric_selector, 'direction': direction_selector, 'route': route_selector}
)

# Create Duration vs Stops visualization
def plot_duration_stops():
    fig = px.box(
        df,
        x='Stops',
        y='Flight Duration',
        color='Stops_Category',
        points='all',
        title='<b>Flight Duration by Number of Stops</b>',
        labels={'Flight Duration': 'Duration (minutes)', 'Stops': 'Number of Stops'},
        height=500
    )
    
    fig.update_layout(
        plot_bgcolor='white',
        paper_bgcolor='white',
        xaxis=dict(tickvals=sorted(df['Stops'].unique())),
        showlegend=False
    )
    
    # Add trend line annotation
    fig.add_annotation(
        x=0.5, y=0.9,
        xref='paper', yref='paper',
        text="<i>Trend: More stops generally increase total duration</i>",
        showarrow=False,
        font=dict(size=12, color='gray')
    )
    
    fig.show()

# Create interactive widgets
controls = widgets.VBox([
    widgets.HBox([direction_selector, metric_selector]),
], layout=Layout(
    padding='20px',
    border='1px solid #dee2e6',
    border_radius='12px',
    margin='0 0 20px 0'
))

# Display the controls and visualizations
display(controls)
widgets.interactive_output(plot_heatmaps, {'direction': direction_selector, 'metric': metric_selector})

# Display the Duration vs Stops visualization
plot_duration_stops()

VBox(children=(HBox(children=(RadioButtons(description='Metric:', layout=Layout(width='200px'), options=('Pric…

VBox(children=(HBox(children=(RadioButtons(description='Direction:', layout=Layout(width='200px'), options=('F…

Artifact 6: Anomaly Detection

In [None]:
# Markdown: Anomaly Detection
"""
## Anomaly Detection
This section identifies outliers in price, duration, and emissions using the IQR method, visualized with box plots.
"""

# Outlier detection function
def detect_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers

# Detect outliers
price_outliers = detect_outliers(df, 'Price')
duration_outliers = detect_outliers(df, 'Flight Duration')
emissions_outliers = detect_outliers(df, 'co2 emissions')
print("Price Outliers:", len(price_outliers))
print("Duration Outliers:", len(duration_outliers))
print("Emissions Outliers:", len(emissions_outliers))

# Box plots with outliers
fig = make_subplots(rows=1, cols=3, subplot_titles=['Price (USD)', 'Flight Duration (minutes)', 'CO2 Emissions (kg)'])
fig.add_trace(go.Box(x=df['Route'], y=df['Price'], name='Price', marker_color='#1f77b4'), row=1, col=1)
fig.add_trace(go.Box(x=df['Route'], y=df['Flight Duration'], name='Duration', marker_color='#ff7f0e'), row=1, col=2)
fig.add_trace(go.Box(x=df['Route'], y=df['co2 emissions'], name='CO2 Emissions', marker_color='#2ca02c'), row=1, col=3)
fig.update_layout(title_text='Outlier Detection by Route', showlegend=False, height=500)
fig.update_xaxes(tickangle=45)
fig.show()

Artifact 7: Conclusion