In [None]:
#Code to get the load_csv_database function.
#We pass this function as context so there's no need to include it when we parse the notebook
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..")))
from spider2_utils import load_csv_database


-setup-

In [None]:
import pandas as pd
_database = load_csv_database("sqlite-sakila", rows_limit=-1)
rental = _database["rental"]
staff = _database["staff"]

### Question:

Can you identify the year and month with the highest rental orders created by the store's staff for each store? Please list the store ID, the year, the month, and the total rentals for those dates.

### Step 1: Merge rental and staff tables on STAFF_ID
**User intent:** Join RENTAL and STAFF tables using staff_id

In [None]:
rental_merged = rental.merge(staff, on='staff_id')

### Step 2: Extract year and month from rental_date
**User intent:** Add columns for year and rental month as strings

In [None]:
rental_merged['rental_date'] = pd.to_datetime(rental_merged['rental_date'])
rental_merged['year'] = rental_merged['rental_date'].dt.strftime('%Y')
rental_merged['rental_month'] = rental_merged['rental_date'].dt.strftime('%m')

### Step 3: Count rentals per year, month, and store
**User intent:** Group by year, month, and store_id and count rentals

In [None]:
result_table = rental_merged.groupby(['year', 'rental_month', 'store_id']).agg(
    count=('rental_id', 'count')
).reset_index()

### Step 4: Aggregate total rentals per store, year, and month
**User intent:** Group by year, month, and store_id to get total rentals

In [None]:
monthly_sales = result_table.groupby(['year', 'rental_month', 'store_id'])['count'].sum().reset_index(name='total_rentals')

### Step 5: Calculate max rentals per store
**User intent:** Get the maximum monthly rental count for each store

In [None]:
monthly_sales['max_rentals'] = monthly_sales.groupby('store_id')['total_rentals'].transform('max')

### Step 6: Filter months where total rentals equal max rentals
**User intent:** Keep only rows where total_rentals equals max_rentals

In [None]:
store_max_sales = monthly_sales[monthly_sales['total_rentals'] == monthly_sales['max_rentals']]

### Step 7: Select relevant columns and sort by store_id
**User intent:** Return store_id, year, rental_month, and total_rentals ordered by store_id

In [None]:
final_result = store_max_sales[['store_id', 'year', 'rental_month', 'total_rentals']].sort_values('store_id')
final_result