In [None]:
#Code to get the load_csv_database function.
#We pass this function as context so there's no need to include it when we parse the notebook
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..")))
from spider2_utils import load_csv_database


-setup-

In [None]:
import pandas as pd
_database = load_csv_database("sqlite-sakila", rows_limit=-1)
payment = _database["payment"]

### Question

Among our top 10 paying customers, can you identify the largest change in payment amounts from one month to the immediately following month? Specifically, please determine for which customer and during which month this maximum month-over-month difference occurred, and provide the difference rounded to two decimal places.

### Step 1: Extract month from the payment date
**User intent:** Add a column for the month of the payment as a two-digit string

In [None]:
payment['payment_date'] = pd.to_datetime(payment['payment_date'])
payment['pay_mon'] = payment['payment_date'].dt.strftime('%m')

### Step 2: Aggregate monthly payment count and amount per customer
**User intent:** Group by customer and month to calculate payment count and sum

In [None]:
result_table = payment.groupby(['pay_mon', 'customer_id']).agg(
    pay_countpermon=('amount', 'count'),
    pay_amount=('amount', 'sum')
).reset_index()

### Step 3: Calculate total payments per customer
**User intent:** Sum all monthly payments for each customer

In [None]:
total_payments = result_table.groupby('customer_id')['pay_amount'].sum().reset_index(name='total_payments')

### Step 4: Identify top 10 customers by total payments
**User intent:** Select top 10 customers with highest total payment

In [None]:
top10_customer = total_payments.sort_values('total_payments', ascending=False).head(10)

### Step 5: Filter result_table for only top 10 customers
**User intent:** Keep only rows belonging to the top 10 customers

In [None]:
filtered_result = result_table[result_table['customer_id'].isin(top10_customer['customer_id'])]

### Step 6: Sort filtered results for correct LAG behavior
**User intent:** Sort by customer and month to prepare for lag calculation

In [None]:
filtered_result = filtered_result.sort_values(['customer_id', 'pay_mon'])

### Step 7: Calculate difference in pay_amount from previous month
**User intent:** Compute absolute month-over-month difference in payment amount

In [None]:
import numpy as np

filtered_result['diff'] = filtered_result.groupby('customer_id')['pay_amount'].shift().rsub(filtered_result['pay_amount']).abs()

### Step 8: Keep month and difference values only
**User intent:** Rename columns to match final output format

In [None]:
difference_per_mon = filtered_result[['pay_mon', 'pay_amount', 'pay_countpermon', 'diff']].copy()
difference_per_mon.rename(columns={'pay_mon': 'month'}, inplace=True)

### Step 9: Find the maximum difference per month
**User intent:** Compute max difference per month

In [None]:
max_diff_per_month = difference_per_mon.groupby('month')['diff'].max().reset_index(name='max_diff')

### Step 10: Join max diff values back to original table
**User intent:** Add max_diff to each row to compare with its diff

In [None]:
merged = difference_per_mon.merge(max_diff_per_month, on='month')

### Step 11: Filter rows where diff equals max_diff
**User intent:** Identify customer-month pairs with max monthly difference

In [None]:
max_diff_rows = merged[merged['diff'] == merged['max_diff']]

### Step 12: Select the row with the highest max_diff across all months
**User intent:** Get the row with the largest difference overall

In [None]:
final_result = max_diff_rows.sort_values('max_diff', ascending=False).head(1)[['month', 'max_diff']]
final_result['max_diff'] = final_result['max_diff'].round(2)

### Step 13: Display the final result
**User intent:** Show the month with the highest payment difference and its value

In [None]:
final_result