# Daily driver matrix and lagged returns

In [1]:
import sys  # no installation needed for stdlib/project-local
from pathlib import Path  # no installation needed for stdlib/project-local

ROOT = Path(r"C:\\Users\\quantbase\\Desktop\\ecom_forecast")
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


In [2]:
import yaml  # already in env - no new install for third-party libs
import pandas as pd  # already in env - no new install for third-party libs

from src.config import ProjectPaths  # no installation needed for stdlib/project-local
from src.drivers import build_driver_matrix, load_q4_daily_audit  # no installation needed for stdlib/project-local


In [3]:
paths = ProjectPaths.from_root()
paths.ensure_directories()
drivers_dir = paths.outputs_dir / 'drivers'
drivers_dir.mkdir(parents=True, exist_ok=True)
assumptions = paths.load_assumptions()


In [4]:
audit_daily = load_q4_daily_audit(paths.outputs_dir)
sessions_daily = pd.read_pickle(paths.clean_dir / 'sessions_daily.pkl')
marketing_daily = pd.read_pickle(paths.clean_dir / 'marketing_daily.pkl')
for frame in (sessions_daily, marketing_daily):
    if 'Day' in frame.columns:
        frame['Day'] = pd.to_datetime(frame['Day'])


In [5]:
driver_matrix = build_driver_matrix(
    audit_df=audit_daily,
    sessions_df=sessions_daily,
    marketing_df=marketing_daily,
    assumptions=assumptions,
)
assert len(driver_matrix) == len(audit_daily)
assert not driver_matrix['Day'].duplicated().any()


In [6]:
lag_cols = [c for c in driver_matrix.columns if c.endswith('_lag_7') or c.endswith('_lag_14') or c.endswith('_lag_21')]
lag_na_counts = {col: int(driver_matrix[col].isna().sum()) for col in lag_cols}
rowcount = len(driver_matrix)
min_day = driver_matrix['Day'].min()
max_day = driver_matrix['Day'].max()
print('rows', rowcount, 'min_day', min_day, 'max_day', max_day)
print('lag_na_counts', lag_na_counts)


rows 91 min_day 2025-09-17 00:00:00 max_day 2025-12-16 00:00:00
lag_na_counts {'returns_abs_lag_7': 7, 'return_rate_gross_lag_7': 7, 'net_sales_lag_7': 7, 'ad_spend_lag_7': 7, 'returns_abs_lag_14': 14, 'return_rate_gross_lag_14': 14, 'net_sales_lag_14': 14, 'ad_spend_lag_14': 14, 'returns_abs_lag_21': 21, 'return_rate_gross_lag_21': 21, 'net_sales_lag_21': 21, 'ad_spend_lag_21': 21}


In [7]:
driver_matrix.to_csv(drivers_dir / 'driver_matrix.csv', index=False)
driver_matrix.to_pickle(drivers_dir / 'driver_matrix.pkl')
['driver_matrix.csv', 'driver_matrix.pkl']


['driver_matrix.csv', 'driver_matrix.pkl']

In [9]:
driver_matrix.shape

(91, 81)

In [11]:
driver_matrix['Day'].min()

Timestamp('2025-09-17 00:00:00')

In [12]:
driver_matrix['Day'].max()

Timestamp('2025-12-16 00:00:00')

In [13]:
driver_matrix['Day'].duplicated().sum()

0

In [None]:
# Lag check - for complete comb run on all cols

driver_matrix[f'returns_abs_lag_7'].isna().sum()  

7

In [15]:
driver_matrix[['Day','Net sales','Gross sales','returns_abs','return_rate_gross','Ad_Spend','mer','aov_proxy']].head(10)

Unnamed: 0,Day,Net sales,Gross sales,returns_abs,return_rate_gross,Ad_Spend,mer,aov_proxy
0,2025-09-17,84236.87,97074.6,12622.23,0.130026,5350,15.745209,209.441422
1,2025-09-18,31587.32,44746.99,12186.48,0.272342,4820,6.553386,177.642735
2,2025-09-19,37746.52,45629.9,7877.38,0.172636,6580,5.736553,207.344455
3,2025-09-20,53603.15,57708.4,4047.0,0.070128,5960,8.993817,226.092547
4,2025-09-21,34462.5,36737.7,2226.4,0.060603,5540,6.220668,206.952172
5,2025-09-22,27274.66,33192.81,5851.9,0.1763,5180,5.265378,197.214338
6,2025-09-23,17362.43,39063.49,21516.07,0.550797,15050,1.15365,106.061493
7,2025-09-24,39224.33,52284.05,12233.12,0.233974,6670,5.880709,182.859346
8,2025-09-25,23639.53,32033.03,8393.5,0.262026,6050,3.90736,158.029508
9,2025-09-26,29161.35,36977.78,7816.43,0.211382,7390,3.946055,210.85639


In [16]:
driver_matrix.head(3)

Unnamed: 0,Day,Net sales,Gross sales,Returns,Discounts,Sessions,Conversion rate,Ad_Spend,Meta_Spend,Google_Spend,...,net_sales_lag_7,ad_spend_lag_7,returns_abs_lag_14,return_rate_gross_lag_14,net_sales_lag_14,ad_spend_lag_14,returns_abs_lag_21,return_rate_gross_lag_21,net_sales_lag_21,ad_spend_lag_21
0,2025-09-17,84236.87,97074.6,-12622.23,-215.5,7418,0.03842,5350,3500,1200,...,,,,,,,,,,
1,2025-09-18,31587.32,44746.99,-12186.48,-973.19,7027,0.017931,4820,3200,1100,...,,,,,,,,,,
2,2025-09-19,37746.52,45629.9,-7877.38,-6.0,10654,0.012108,6580,4100,1500,...,,,,,,,,,,


In [17]:
driver_matrix.tail(3)

Unnamed: 0,Day,Net sales,Gross sales,Returns,Discounts,Sessions,Conversion rate,Ad_Spend,Meta_Spend,Google_Spend,...,net_sales_lag_7,ad_spend_lag_7,returns_abs_lag_14,return_rate_gross_lag_14,net_sales_lag_14,ad_spend_lag_14,returns_abs_lag_21,return_rate_gross_lag_21,net_sales_lag_21,ad_spend_lag_21
88,2025-12-14,21722.79,23067.69,-1035.0,-309.9,5202,0.014225,6565,4100,1550,...,16618.7,5950.0,312.0,0.016425,18559.88,7400.0,348.0,0.016471,20743.0,5950.0
89,2025-12-15,17948.37,22199.56,-3711.86,-539.33,5330,0.015947,6360,4000,1500,...,28489.6,7400.0,378.0,0.015555,23470.58,8550.0,2004.24,0.064179,26591.82,6770.0
90,2025-12-16,19497.78,22413.28,-2711.49,-204.01,5648,0.015758,6565,4100,1550,...,19112.77,6360.0,6378.28,0.209286,23760.41,9060.0,8085.34,0.204959,23847.79,7390.0
