In [None]:

import pandas as pd
import plotly.express as px

# Load the mock dataset
df = pd.read_csv("mock_brazilian_ecommerce_data.csv", parse_dates=["order_purchase_timestamp", "order_delivered_customer_date", "order_estimated_delivery_date"])


### Monthly Revenue Trends

In [None]:

df['order_month'] = df['order_purchase_timestamp'].dt.to_period('M').astype(str)
monthly_revenue = df.groupby('order_month')['payment_value'].sum().reset_index()
fig = px.bar(monthly_revenue, x='order_month', y='payment_value',
             labels={'order_month': 'Month', 'payment_value': 'Revenue'},
             title='Monthly Revenue Trends')
fig.show()


### Customer Review Score Distribution

In [None]:

review_distribution = df['review_score'].value_counts().sort_index().reset_index()
review_distribution.columns = ['review_score', 'count']
fig = px.bar(review_distribution, x='review_score', y='count',
             labels={'review_score': 'Review Score', 'count': 'Number of Reviews'},
             title='Customer Review Score Distribution')
fig.show()


### Delivery Performance: Delay Distribution

In [None]:

df['delivery_delay'] = (df['order_delivered_customer_date'] - df['order_estimated_delivery_date']).dt.days
fig = px.histogram(df, x='delivery_delay',
                   nbins=20,
                   labels={'delivery_delay': 'Delivery Delay (days)'},
                   title='Delivery Performance: Delay Distribution')
fig.show()


### Customer Segmentation by State

In [None]:

state_orders = df['customer_state'].value_counts().reset_index()
state_orders.columns = ['customer_state', 'order_count']
fig = px.bar(state_orders, x='customer_state', y='order_count',
             labels={'customer_state': 'Customer State', 'order_count': 'Number of Orders'},
             title='Customer Segmentation by State')
fig.show()
