# Data

In [1]:
import pandas as pd

data = {
    "order_id": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010],
    "customer_id": ["C001", "C002", "C001", "C003", "C004", "C005", "C003", "C001", "C006", "C007"],
    "product_id": ["P100", "P101", "P100", "P102", "P103", "P104", "P102", "P100", "P105", "P101"],
    "order_date": pd.to_datetime([
        "2024-12-01", "2024-12-01", "2024-12-05", "2024-12-08", "2024-12-10",
        "2024-12-11", "2024-12-12", "2024-12-15", "2024-12-20", "2024-12-22"
    ]),
    "quantity": [2, 1, 1, 3, 2, 1, 1, 1, 5, 2],
    "price_per_unit": [19.99, 299.99, 19.99, 9.99, 15.00, 45.00, 9.99, 19.99, 7.50, 299.99],
    "category": ["Electronics", "Home", "Electronics", "Books", "Books", "Electronics", "Books", "Electronics", "Home", "Home"],
    "returned": [False, False, True, False, False, False, True, False, False, True]
}

amazon_orders = pd.DataFrame(data)
amazon_orders


Unnamed: 0,order_id,customer_id,product_id,order_date,quantity,price_per_unit,category,returned
0,1001,C001,P100,2024-12-01,2,19.99,Electronics,False
1,1002,C002,P101,2024-12-01,1,299.99,Home,False
2,1003,C001,P100,2024-12-05,1,19.99,Electronics,True
3,1004,C003,P102,2024-12-08,3,9.99,Books,False
4,1005,C004,P103,2024-12-10,2,15.0,Books,False
5,1006,C005,P104,2024-12-11,1,45.0,Electronics,False
6,1007,C003,P102,2024-12-12,1,9.99,Books,True
7,1008,C001,P100,2024-12-15,1,19.99,Electronics,False
8,1009,C006,P105,2024-12-20,5,7.5,Home,False
9,1010,C007,P101,2024-12-22,2,299.99,Home,True


# Question1:
Write code to calculate the total revenue for each product after adjusting for returned orders.A returned order should contribute 0 revenue.Include product_id, total_units_sold, adjusted_revenue, and sort by adjusted_revenue descending.

In [10]:
amazon_orders['net_revenue'] = amazon_orders['quantity']*amazon_orders['price_per_unit']*(1-amazon_orders['returned'])

ans1 = amazon_orders.groupby(['product_id']).agg(total_units_sold = ('quantity', 'sum'),
                                                 adjusted_revenue = ('net_revenue', 'sum'),
                                                 ).reset_index().sort_values(by = 'adjusted_revenue', ascending=False)

ans1

Unnamed: 0,product_id,total_units_sold,adjusted_revenue
1,P101,3,299.99
0,P100,4,59.97
4,P104,1,45.0
5,P105,5,37.5
3,P103,2,30.0
2,P102,4,29.97


# Question 2: 
Top Revenue-Generating Product Per Week (Excluding Returns)
Write code to find, for each calendar week, the product that generated the highest total adjusted revenue (excluding returned orders).

Your output should include:

week_start_date (Monday of that week)

product_id

weekly_revenue

In [31]:
amazon_orders['net_revenue'] = amazon_orders['quantity']*amazon_orders['price_per_unit']*(1-amazon_orders['returned'])
amazon_orders['day_of_week'] = amazon_orders['order_date'].dt.weekday

def get_monday(row):
    return row['order_date'] - pd.Timedelta(days = row['day_of_week'])

amazon_orders['week_start_date'] = amazon_orders.apply(get_monday, axis = 1)

ans2 = amazon_orders.groupby(['week_start_date', 'product_id']).agg(weekly_revenue = ('net_revenue', 'sum')).reset_index()

ans2['rank'] = ans2.groupby(['week_start_date'])['weekly_revenue'].rank(method='dense', ascending=False)

ans2.query('rank == 1')

Unnamed: 0,week_start_date,product_id,weekly_revenue,rank
1,2024-11-25,P101,299.99,1.0
3,2024-12-02,P102,29.97,1.0
7,2024-12-09,P104,45.0,1.0
9,2024-12-16,P105,37.5,1.0


# Question3
Identify customers who were inactive for at least 7 consecutive days, and then placed an order again (i.e., got reactivated).

Return a DataFrame with:

customer_id

reactivation_date (date they came back after inactivity)

days_since_last_order

In [51]:
amazon_orders = amazon_orders.sort_values(by = ['customer_id', 'order_date'])
amazon_orders['last_order_date'] = amazon_orders.groupby('customer_id')['order_date'].shift(1)
amazon_orders['days_since_last_order'] = amazon_orders['order_date'] - amazon_orders['last_order_date']
amazon_orders

Unnamed: 0,order_id,customer_id,product_id,order_date,quantity,price_per_unit,category,returned,net_revenue,day_of_week,week_start_date,last_order_date,days_since_last_order
0,1001,C001,P100,2024-12-01,2,19.99,Electronics,False,39.98,6,2024-11-25,NaT,NaT
2,1003,C001,P100,2024-12-05,1,19.99,Electronics,True,0.0,3,2024-12-02,2024-12-01,4 days
7,1008,C001,P100,2024-12-15,1,19.99,Electronics,False,19.99,6,2024-12-09,2024-12-05,10 days
1,1002,C002,P101,2024-12-01,1,299.99,Home,False,299.99,6,2024-11-25,NaT,NaT
3,1004,C003,P102,2024-12-08,3,9.99,Books,False,29.97,6,2024-12-02,NaT,NaT
6,1007,C003,P102,2024-12-12,1,9.99,Books,True,0.0,3,2024-12-09,2024-12-08,4 days
4,1005,C004,P103,2024-12-10,2,15.0,Books,False,30.0,1,2024-12-09,NaT,NaT
5,1006,C005,P104,2024-12-11,1,45.0,Electronics,False,45.0,2,2024-12-09,NaT,NaT
8,1009,C006,P105,2024-12-20,5,7.5,Home,False,37.5,4,2024-12-16,NaT,NaT
9,1010,C007,P101,2024-12-22,2,299.99,Home,True,0.0,6,2024-12-16,NaT,NaT


In [53]:
cols_to_keep = ['customer_id', 'order_date', 'days_since_last_order']
ans3 = amazon_orders[amazon_orders['days_since_last_order'].dt.days > 7][cols_to_keep]
ans3 = ans3.rename(columns={'order_date':'reactivation_date'})

ans3.sort_values(by='reactivation_date').drop_duplicates('customer_id')

ans3

Unnamed: 0,customer_id,reactivation_date,days_since_last_order
7,C001,2024-12-15,10 days


# Question 4

For each category and each order date, compute the rolling 30-day return rate, defined as:

$$\text{return rate} = \frac{\text{Number of returned orders in last 30 days}}{\text{Total number of orders in last 30 days}}$$

Return a DataFrame with:
- category
- order_date
- rolling_return_rate (rounded to 3 decimals)

In [59]:
amazon_orders2 = amazon_orders[['order_id', 'category', 'order_date', 'returned']]
amazon_orders2

Unnamed: 0,order_id,category,order_date,returned
0,1001,Electronics,2024-12-01,False
2,1003,Electronics,2024-12-05,True
7,1008,Electronics,2024-12-15,False
1,1002,Home,2024-12-01,False
3,1004,Books,2024-12-08,False
6,1007,Books,2024-12-12,True
4,1005,Books,2024-12-10,False
5,1006,Electronics,2024-12-11,False
8,1009,Home,2024-12-20,False
9,1010,Home,2024-12-22,True


In [60]:
# Step 1: Daily aggregates
daily_stats = amazon_orders.groupby(['category', 'order_date']).agg(
    total_orders=('order_id', 'count'),
    returned_orders=('returned', 'sum')
).reset_index()

# Step 2: Set multi-index for time-aware rolling
daily_stats = daily_stats.sort_values(['category', 'order_date'])
daily_stats.set_index('order_date', inplace=True)

daily_stats

Unnamed: 0_level_0,category,total_orders,returned_orders
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-12-08,Books,1,0
2024-12-10,Books,1,0
2024-12-12,Books,1,1
2024-12-01,Electronics,1,0
2024-12-05,Electronics,1,1
2024-12-11,Electronics,1,0
2024-12-15,Electronics,1,0
2024-12-01,Home,1,0
2024-12-20,Home,1,0
2024-12-22,Home,1,1


In [61]:
# Step 3: Apply 30-day rolling logic within each category
rolling_stats = daily_stats.groupby('category').rolling('30D').sum().reset_index()
rolling_stats

Unnamed: 0,category,order_date,total_orders,returned_orders
0,Books,2024-12-08,1.0,0.0
1,Books,2024-12-10,2.0,0.0
2,Books,2024-12-12,3.0,1.0
3,Electronics,2024-12-01,1.0,0.0
4,Electronics,2024-12-05,2.0,1.0
5,Electronics,2024-12-11,3.0,1.0
6,Electronics,2024-12-15,4.0,1.0
7,Home,2024-12-01,1.0,0.0
8,Home,2024-12-20,2.0,0.0
9,Home,2024-12-22,3.0,1.0


In [63]:
# Step 4: Compute return rate
rolling_stats['rolling_return_rate'] = rolling_stats['returned_orders'] / rolling_stats['total_orders']

# Final columns
result = rolling_stats[['category', 'order_date', 'rolling_return_rate']]
result

Unnamed: 0,category,order_date,rolling_return_rate
0,Books,2024-12-08,0.0
1,Books,2024-12-10,0.0
2,Books,2024-12-12,0.333333
3,Electronics,2024-12-01,0.0
4,Electronics,2024-12-05,0.5
5,Electronics,2024-12-11,0.333333
6,Electronics,2024-12-15,0.25
7,Home,2024-12-01,0.0
8,Home,2024-12-20,0.0
9,Home,2024-12-22,0.333333
