# Task 2: Interactive Dashboard for Retail EDA
# Author: Syed Huzaifa Bin Khamis | Nexus AI Digital Internship

# 1. Import Libraries

In [1]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# 2. Cleaning dataset

In [2]:
# Load raw dataset
df = pd.read_csv("OnlineRetail.csv", encoding="unicode_escape")

# Clean like in Task 1
df = df.dropna(subset=['CustomerID'])
df = df.drop_duplicates()
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['CustomerID'] = df['CustomerID'].astype(int)

# Add Total Sales column
df['TotalSales'] = df['Quantity'] * df['UnitPrice']

# Save for reuse (optional)
df.to_csv("OnlineRetail_Clean.csv", index=False)


# 3. Load cleaned dataset

In [3]:
df = pd.read_csv("OnlineRetail_Clean.csv")
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['TotalSales'] = df['Quantity'] * df['UnitPrice']

# 4. Extract month-year for trend analysis

In [4]:
df['YearMonth'] = df['InvoiceDate'].dt.to_period("M").astype(str)

# 5. Create Dash App

In [5]:
app = Dash(__name__)
app.title = "Retail Sales Dashboard"

# 6. Dropdown options for country

In [6]:
country_options = [{'label': c, 'value': c} for c in df['Country'].unique()]

# 7. App Layout

In [7]:
app.layout = html.Div([
    html.H1("Retail Sales Dashboard", style={'textAlign': 'center'}),

    # Dropdown filter
    html.Div([
        html.Label("Filter by Country:"),
        dcc.Dropdown(
            id='country_filter',
            options=[{'label': 'All', 'value': 'All'}] + country_options,
            value='All',
            clearable=False
        )
    ], style={'width': '40%', 'margin': 'auto'}),

    html.Br(),

    # Row 1: Sales Trend + Top Products
    html.Div([
        dcc.Graph(id='sales_trend'),
        dcc.Graph(id='top_products')
    ], style={'display': 'flex', 'flexWrap': 'wrap'}),

    # Row 2: Country Sales + Basket Size Distribution
    html.Div([
        dcc.Graph(id='country_sales'),
        dcc.Graph(id='basket_distribution')
    ], style={'display': 'flex', 'flexWrap': 'wrap'})
])


# 8. Callbacks

In [8]:
@app.callback(
    [Output('sales_trend', 'figure'),
     Output('top_products', 'figure'),
     Output('country_sales', 'figure'),
     Output('basket_distribution', 'figure')],
    [Input('country_filter', 'value')]
)
def update_dashboard(selected_country):
    # Filter by country
    if selected_country == 'All':
        dff = df.copy()
    else:
        dff = df[df['Country'] == selected_country]

    # Sales Trend
    monthly_sales = dff.groupby('YearMonth')['TotalSales'].sum().reset_index()
    fig_trend = px.line(monthly_sales, x='YearMonth', y='TotalSales',
                        title="Monthly Sales Trend", markers=True)

    # Top Products
    top_products = dff.groupby('Description')['TotalSales'].sum().nlargest(10).reset_index()
    fig_products = px.bar(top_products, x='TotalSales', y='Description',
                          orientation='h', title="Top 10 Products by Sales")

    # Country Sales
    country_sales = dff.groupby('Country')['TotalSales'].sum().nlargest(10).reset_index()
    fig_country = px.bar(country_sales, x='TotalSales', y='Country',
                         orientation='h', title="Top Countries by Sales")

    # Basket Size Distribution
    basket_size = dff.groupby('CustomerID')['TotalSales'].sum().reset_index()
    fig_basket = px.histogram(basket_size, x='TotalSales', nbins=50,
                              title="Customer Spend Distribution")

    return fig_trend, fig_products, fig_country, fig_basket

# 9. Run Server

In [9]:
if __name__ == '__main__':
    app.run(debug=True)