In [2]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output, dash_table
import dash_bootstrap_components as dbc

# Load data

file_path = "../datasets/labeled_data/ethiopian_airlines_overall_sentiment_final.csv"
df = pd.read_csv(file_path)

df['year'] = df['year'].astype(str)
df['month'] = pd.Categorical(
    df['month'],
    categories=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
    ordered=True
)
df['year_month'] = df['year'] + '-' + df['month'].astype(str)
df['year_month_dt'] = pd.to_datetime(df['year'] + '-' + df['month'].astype(str), format='%Y-%b')

rating_cols = [
    "seat_comfort", "food_and_beverages", "inflight_entertainment",
    "value_for_money", "customer_service"
]

year_options = [{'label': 'All Years', 'value': 'ALL'}] + [{'label': y, 'value': y} for y in sorted(df['year'].unique())]
source_options = [{'label': 'All Sources', 'value': 'ALL'}] + [{'label': s, 'value': s} for s in sorted(df['source'].unique())]
category_options = [{'label': 'All Categories', 'value': 'ALL'}] + [
    {'label': col.replace('_', ' ').title(), 'value': col} for col in rating_cols
]

def make_subtitle(year_label, source_label, category_label=None):
    base = f"Year: {year_label} | Source: {source_label}"
    if category_label:
        base += f" | Category: {category_label}"
    return base

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H2("Ethiopian Airlines Sentiment Dashboard", style={'textAlign': 'center'}),
    html.P(
        "Analyze Ethiopian Airlines reviews by year, source, category, and rating. "
        "See sentiment trends, average ratings, and the top cities for positive and negative feedback. "
        "Filter results and view real passenger experiences in detail.",
        style={'textAlign': 'center'}
    ),
    
    dbc.Row([
        dbc.Col([
            html.Label("Select Year"),
            dcc.Dropdown(id='year-dropdown', options=year_options, value='ALL', clearable=False),
        ], width=3),
        dbc.Col([
            html.Label("Select Source"),
            dcc.Dropdown(id='source-dropdown', options=source_options, value='ALL', clearable=False),
        ], width=3),
        dbc.Col([
            html.Label("Select Category"),
            dcc.Dropdown(id='category-dropdown', options=category_options, value='ALL', clearable=False),
        ], width=3),
        dbc.Col([
            html.Label("Category Rating (optional)"),
            dcc.Dropdown(
                id='category-rating-dropdown',
                options=[{'label': str(i), 'value': i} for i in range(1, 6)],
                value=None,
                clearable=True,
                placeholder="All Ratings"
            ),
        ], width=3),
    ], className='mb-4'),

    dbc.Row([
        dbc.Col(dcc.Graph(id='sentiment-pie'), width=4),
        dbc.Col(dcc.Graph(id='sentiment-bar'), width=4),
        dbc.Col(dcc.Graph(id='avg-rating-bar'), width=4)
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id='trend-line'), width=12),
    ]),

    # Rating Trend Line Chart
    dbc.Row([
        dbc.Col(dcc.Graph(id='rating-trend-line'), width=12),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id='top10-departure-positive'), width=6),
        dbc.Col(dcc.Graph(id='top10-departure-negative'), width=6)
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id='top10-arrival-positive'), width=6),
        dbc.Col(dcc.Graph(id='top10-arrival-negative'), width=6)
    ]),

    html.H4("Filtered Reviews Table", className='mt-4'),
    dash_table.DataTable(
        id='reviews-table',
        columns=[{'name': c, 'id': c} for c in [
            'year', 'month', 'departure_city', 'arrival_city', 'review_title', 'overall_sentiment'
        ] + rating_cols],
        page_size=10,
        style_table={'overflowX': 'auto'},
        style_cell={'textAlign': 'left'}
    )
], fluid=True)

@app.callback(
    [
        Output('sentiment-pie', 'figure'),
        Output('sentiment-bar', 'figure'),
        Output('avg-rating-bar', 'figure'),
        Output('trend-line', 'figure'),
        Output('top10-departure-positive', 'figure'),
        Output('top10-departure-negative', 'figure'),
        Output('top10-arrival-positive', 'figure'),
        Output('top10-arrival-negative', 'figure'),
        Output('reviews-table', 'data'),
        Output('rating-trend-line', 'figure'),
    ],
    [
        Input('year-dropdown', 'value'),
        Input('source-dropdown', 'value'),
        Input('category-dropdown', 'value'),
        Input('category-rating-dropdown', 'value'),
    ]
)
def update_dashboard(selected_year, selected_source, selected_category, selected_rating):
    filtered_df = df.copy()
    year_label = selected_year if selected_year != 'ALL' else "Overall"
    source_label = selected_source if selected_source != 'ALL' else "All Sources"
    category_label = selected_category.replace('_', ' ').title() if selected_category != 'ALL' else "All Categories"

    if selected_year != 'ALL':
        filtered_df = filtered_df[filtered_df['year'] == selected_year]
    if selected_source != 'ALL':
        filtered_df = filtered_df[filtered_df['source'] == selected_source]
    if selected_category != 'ALL':
        filtered_df = filtered_df[filtered_df[selected_category].notnull()]
        if selected_rating is not None:
            filtered_df = filtered_df[filtered_df[selected_category] == selected_rating]

    subtitle = make_subtitle(year_label, source_label)
    subtitle_with_cat = make_subtitle(year_label, source_label, category_label)

    # Pie Chart
    pie_fig = px.pie(
    filtered_df, 
    names="overall_sentiment", 
    title="Sentiment Distribution",
    color="overall_sentiment",
    color_discrete_map={
        "Positive": "#1f77b4",
        "Negative": "#d62728",
        "Neutral": "#2ca02c"
    }
)

    pie_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Bar Chart - Overall Sentiment Count (with values above bars)
    sentiment_counts = filtered_df['overall_sentiment'].value_counts().reset_index()
    sentiment_counts.columns = ['Sentiment', 'Count']
    bar_fig = px.bar(
        sentiment_counts, x='Sentiment', y='Count',
        title='Overall Sentiment Count',
        text_auto=True
    )
    bar_fig.update_traces(textposition='outside')
    bar_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Average Ratings Bar (with values above bars)
    if selected_category == 'ALL':
        avg_ratings = filtered_df[rating_cols].mean().reset_index()
        avg_ratings.columns = ['Category', 'Average Rating']
        avg_rating_fig = px.bar(
            avg_ratings, x='Category', y='Average Rating',
            title='Average Ratings per Category',
            text_auto='.2f'
        )
        avg_rating_fig.update_traces(textposition='outside')
        avg_rating_fig.update_yaxes(range=[0, 5])
    else:
        avg_value = filtered_df[selected_category].mean() if not filtered_df.empty else 0
        avg_rating_fig = px.bar(
            x=[category_label],
            y=[avg_value],
            labels={'x': 'Category', 'y': 'Average Rating'},
            title=f'Average {category_label}',
            text_auto='.2f'
        )
        avg_rating_fig.update_traces(textposition='outside')
        avg_rating_fig.update_yaxes(range=[0, 5])
    avg_rating_fig.add_annotation(
        text=subtitle_with_cat, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Sentiment Trend Over Time
    trend = filtered_df.groupby(['year_month', 'overall_sentiment']).size().reset_index(name='count')
    if not filtered_df.empty:
        trend = trend.merge(filtered_df[['year_month', 'year_month_dt']].drop_duplicates(), on='year_month')
        trend = trend.sort_values('year_month_dt')
    trend_fig = px.line(
        trend, x='year_month_dt', y='count', color='overall_sentiment',
        title='Sentiment Trend Over Months',
        labels={'year_month_dt': 'Year-Month', 'count': 'Review Count'}
    )
    unique_months = pd.Series(trend['year_month_dt'].sort_values().unique())
    if selected_year == 'ALL':
        jan_jul = unique_months[unique_months.dt.month.isin([1,7])]
        trend_fig.update_xaxes(
            tickformat='%Y-%b',
            tickvals=jan_jul,
            tickangle=45
        )
    else:
        trend_fig.update_xaxes(
            tickformat='%Y-%b',
            tickvals=unique_months,
            tickangle=45
        )
    trend_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # === RATING TREND LINE CHART ===
    if selected_category == 'ALL':
        rating_trend_all = (
            filtered_df.groupby(['year_month', 'year_month_dt'])[rating_cols]
            .mean()
            .reset_index()
            .sort_values('year_month_dt')
        )
        rating_trend_fig = px.line(
            rating_trend_all,
            x='year_month_dt',
            y=rating_cols,
            title="Average Ratings Trend Over Time (All Categories)",
            labels={'value': 'Average Rating', 'year_month_dt': 'Year-Month', 'variable': 'Category'}
        )
        rating_trend_fig.update_layout(legend_title_text='Category')

        unique_months = pd.Series(rating_trend_all['year_month_dt'].sort_values().unique())
        if selected_year == 'ALL':
            jan_jul = unique_months[unique_months.dt.month.isin([1,7])]
            rating_trend_fig.update_xaxes(
                tickformat='%Y-%b',
                tickvals=jan_jul,
                tickangle=45
            )
        else:
            rating_trend_fig.update_xaxes(
                tickformat='%Y-%b',
                tickvals=unique_months,
                tickangle=45
            )
    else:
        rating_trend_cat = (
            filtered_df.groupby(['year_month', 'year_month_dt'])[selected_category]
            .mean()
            .reset_index()
            .sort_values('year_month_dt')
        )
        rating_trend_fig = px.line(
            rating_trend_cat,
            x='year_month_dt',
            y=selected_category,
            title=f"{category_label} Rating Trend Over Time",
            labels={'year_month_dt': 'Year-Month', selected_category: 'Average Rating'}
        )
        unique_months = pd.Series(rating_trend_cat['year_month_dt'].sort_values().unique())
        if selected_year == 'ALL':
            jan_jul = unique_months[unique_months.dt.month.isin([1,7])]
            rating_trend_fig.update_xaxes(
                tickformat='%Y-%b',
                tickvals=jan_jul,
                tickangle=45
            )
        else:
            rating_trend_fig.update_xaxes(
                tickformat='%Y-%b',
                tickvals=unique_months,
                tickangle=45
            )

    rating_trend_fig.update_yaxes(range=[0, 5])
    rating_trend_fig.add_annotation(
        text=subtitle_with_cat, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Top 10 Departure - Positive
    dep_pos = (filtered_df[filtered_df['overall_sentiment'] == "Positive"]
               .groupby('departure_city')
               .size().reset_index(name='count')
               .sort_values('count', ascending=False)
               .head(10))
    dep_pos_fig = px.bar(dep_pos, x='departure_city', y='count',
                         title='Top 10 Departure Cities (Positive Sentiment)',
                         color_discrete_sequence=['#2ECC40'])
    dep_pos_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Top 10 Departure - Negative
    dep_neg = (filtered_df[filtered_df['overall_sentiment'] == "Negative"]
               .groupby('departure_city')
               .size().reset_index(name='count')
               .sort_values('count', ascending=False)
               .head(10))
    dep_neg_fig = px.bar(dep_neg, x='departure_city', y='count',
                         title='Top 10 Departure Cities (Negative Sentiment)',
                         color_discrete_sequence=['#FF4136'])
    dep_neg_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.12,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Top 10 Arrival - Positive
    arr_pos = (filtered_df[filtered_df['overall_sentiment'] == "Positive"]
               .groupby('arrival_city')
               .size().reset_index(name='count')
               .sort_values('count', ascending=False)
               .head(10))
    arr_pos_fig = px.bar(arr_pos, x='arrival_city', y='count',
                         title='Top 10 Arrival Cities (Positive Sentiment)',
                         color_discrete_sequence=['#2ECC40'])
    arr_pos_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.65,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Top 10 Arrival - Negative
    arr_neg = (filtered_df[filtered_df['overall_sentiment'] == "Negative"]
               .groupby('arrival_city')
               .size().reset_index(name='count')
               .sort_values('count', ascending=False)
               .head(10))
    arr_neg_fig = px.bar(arr_neg, x='arrival_city', y='count',
                         title='Top 10 Arrival Cities (Negative Sentiment)',
                         color_discrete_sequence=['#FF4136'])
    arr_neg_fig.add_annotation(
        text=subtitle, xref="paper", yref="paper", x=0.5, y=1.65,
        showarrow=False, font=dict(size=13, color="#555"),
        xanchor='center', yanchor='top'
    )

    # Table Data
    review_cols = [
        'year', 'month', 'departure_city', 'arrival_city', 'review_title', 'overall_sentiment'
    ] + rating_cols
    table_data = filtered_df[review_cols].to_dict('records')

    return (
        pie_fig, bar_fig, avg_rating_fig, trend_fig,
        dep_pos_fig, dep_neg_fig, arr_pos_fig, arr_neg_fig, table_data,
        rating_trend_fig
    )

if __name__ == "__main__":
    app.run(debug=True, port=8058)
