In [None]:
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define paths
ROOT = Path("..").resolve()
DATA_DIR = ROOT / "data" / "processed"
ANALYTICS_PATH = DATA_DIR / "analytics_table.parquet"

# Load data
df = pd.read_parquet(ANALYTICS_PATH)
print(f"Loaded {len(df)} rows from {ANALYTICS_PATH}")
df.head()

## Data Overview
Check data types and missing values.

In [None]:
df.info()

## Analysis

### 1. Revenue by Country

In [None]:
revenue_by_country = df.groupby("country")["amount"].sum().reset_index()
fig = px.bar(revenue_by_country, x="country", y="amount", title="Total Revenue by Country")
fig.show()

# Save figure
FIGURES_DIR = ROOT / "reports" / "figures"
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
fig.write_html(FIGURES_DIR / "revenue_by_country.html")

### 2. Order Status Distribution

In [None]:
status_counts = df["status_clean"].value_counts().reset_index()
status_counts.columns = ["status", "count"]
fig = px.pie(status_counts, values="count", names="status", title="Order Status Distribution")
fig.show()

### 3. Amount Distribution (Outliers)

In [None]:
fig = px.histogram(df, x="amount", title="Distribution of Order Amounts", nbins=20)
fig.show()