# Regression weights: CM and elasticity

In [1]:
import sys  # no installation needed
from pathlib import Path  # no installation needed

ROOT = Path(r"C:\\Users\\quantbase\\Desktop\\ecom_forecast")
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


In [2]:
import pandas as pd  # already in env - no new install
import numpy as np  # already in env - no new install
from sklearn.preprocessing import StandardScaler  # already in env - no new install
from sklearn.linear_model import Ridge  # already in env - no new install
import statsmodels.api as sm  # already in env - no new install

from src.config import ProjectPaths  # no installation needed
from src.models import evaluate, fit_ols, fit_ridge_standardized, prepare_features, time_split  # no installation needed


In [3]:
paths = ProjectPaths.from_root()
paths.ensure_directories()
models_dir = paths.outputs_dir / 'models'
models_dir.mkdir(parents=True, exist_ok=True)
assumptions = paths.load_assumptions()
epsilon = float(assumptions.get('epsilon', 1e-9))


In [4]:
driver = pd.read_pickle(paths.outputs_dir / 'drivers' / 'driver_matrix.pkl')
driver = driver.sort_values('Day').reset_index(drop=True)
driver['return_rate_gross'] = driver['returns_abs'] / (driver['Gross sales'] + epsilon)
driver['discount_rate_gross'] = (-driver['Discounts']) / (driver['Gross sales'] + epsilon)
feature_cols = [
    'Sessions', 'Conversion rate', 'aov_proxy', 'return_rate_gross', 'discount_rate_gross',
    'Meta_Spend', 'Google_Spend', 'TikTok_Spend', 'Email_SMS_Cost',
    'meta_cpc', 'google_cpc', 'tiktok_cpc',
    'return_rate_gross_lag_7', 'return_rate_gross_lag_14', 'return_rate_gross_lag_21',
    'returns_abs_lag_7', 'returns_abs_lag_14', 'returns_abs_lag_21',
    'mer', 'cpa_proxy'
]
target_cm_d = 'CM$'
target_cm_pct = 'CM%'


In [5]:
train_df, test_df = time_split(driver, split_frac=0.7)
X_train_d, y_train_d = prepare_features(train_df, feature_cols, target_cm_d)
X_test_d, y_test_d = prepare_features(test_df, feature_cols, target_cm_d)
model_cm_d, scaler_d, coefs_cm_d = fit_ridge_standardized(X_train_d, y_train_d, alpha=1.0, feature_names=feature_cols)
X_test_d_scaled = scaler_d.transform(X_test_d)
mae_d, rmse_d, r2_d = evaluate(model_cm_d, X_test_d_scaled, y_test_d)
coefs_cm_d = coefs_cm_d[['feature', 'coef', 'abs_coef']].sort_values('abs_coef', ascending=False)
coefs_cm_d.head(10)




Unnamed: 0,feature,coef,abs_coef
18,mer,2927.945513,2927.945513
2,aov_proxy,1033.901286,1033.901286
6,Google_Spend,841.065887,841.065887
8,Email_SMS_Cost,816.835924,816.835924
7,TikTok_Spend,788.68118,788.68118
11,tiktok_cpc,-742.630342,742.630342
1,Conversion rate,626.700704,626.700704
9,meta_cpc,-399.326009,399.326009
5,Meta_Spend,340.362406,340.362406
19,cpa_proxy,-292.454207,292.454207


In [6]:
X_train_p, y_train_p = prepare_features(train_df, feature_cols, target_cm_pct)
X_test_p, y_test_p = prepare_features(test_df, feature_cols, target_cm_pct)
model_cm_p, scaler_p, coefs_cm_p = fit_ridge_standardized(X_train_p, y_train_p, alpha=1.0, feature_names=feature_cols)
X_test_p_scaled = scaler_p.transform(X_test_p)
mae_p, rmse_p, r2_p = evaluate(model_cm_p, X_test_p_scaled, y_test_p)
coefs_cm_p = coefs_cm_p[['feature', 'coef', 'abs_coef']].sort_values('abs_coef', ascending=False)
coefs_cm_p.head(10)




Unnamed: 0,feature,coef,abs_coef
18,mer,0.035507,0.035507
3,return_rate_gross,-0.031569,0.031569
2,aov_proxy,0.028403,0.028403
11,tiktok_cpc,0.028308,0.028308
7,TikTok_Spend,-0.017234,0.017234
0,Sessions,0.014556,0.014556
6,Google_Spend,-0.013869,0.013869
12,return_rate_gross_lag_7,-0.01308,0.01308
9,meta_cpc,0.012199,0.012199
15,returns_abs_lag_7,0.012086,0.012086


In [7]:
X_train_pct, y_train_pct = prepare_features(train_df, feature_cols, target_cm_pct)
X_test_pct, y_test_pct = prepare_features(test_df, feature_cols, target_cm_pct)
X_train_pct_df = pd.DataFrame(X_train_pct, columns=feature_cols)
X_test_pct_df = pd.DataFrame(X_test_pct, columns=feature_cols)
ols_model, ols_table = fit_ols(X_train_pct_df, y_train_pct)
ols_preds = ols_model.predict(sm.add_constant(X_test_pct_df, has_constant='add'))
mae_ols = np.mean(np.abs(ols_preds - y_test_pct))
rmse_ols = np.sqrt(np.mean((ols_preds - y_test_pct) ** 2))
r2_ols = 1 - np.sum((ols_preds - y_test_pct) ** 2) / np.sum((y_test_pct - y_test_pct.mean()) ** 2)
ols_table.head(10)


Unnamed: 0,param,coef,pvalue,abs_coef
11,google_cpc,14.941781,0.224224,14.941781
10,meta_cpc,-9.239754,0.226463,9.239754
0,const,-5.543853,0.226463,5.543853
12,tiktok_cpc,-2.771926,0.226463,2.771926
2,Conversion rate,-2.041119,0.64767,2.041119
4,return_rate_gross,-0.297305,8e-06,0.297305
13,return_rate_gross_lag_7,-0.215168,0.002813,0.215168
5,discount_rate_gross,0.115751,0.538249,0.115751
14,return_rate_gross_lag_14,0.066666,0.328455,0.066666
15,return_rate_gross_lag_21,-0.043452,0.572545,0.043452


In [8]:
elastic = driver.copy()
elastic['log_sessions'] = np.log(elastic['Sessions'] + epsilon)
elastic['log_ad_spend'] = np.log(elastic['Ad_Spend'] + epsilon)
elastic['log_returns_abs'] = np.log(elastic['returns_abs'] + epsilon)
elastic['log_net_sales'] = np.log(elastic['Net sales'] + epsilon)
elasticity_features = ['log_sessions', 'log_ad_spend', 'log_returns_abs']
elasticity_target = 'log_net_sales'
train_el, test_el = time_split(elastic, split_frac=0.7)
X_train_el, y_train_el = prepare_features(train_el, elasticity_features, elasticity_target)
X_test_el, y_test_el = prepare_features(test_el, elasticity_features, elasticity_target)
model_el, scaler_el, coefs_el = fit_ridge_standardized(X_train_el, y_train_el, alpha=1.0, feature_names=elasticity_features)
X_test_el_scaled = scaler_el.transform(X_test_el)
mae_el, rmse_el, r2_el = evaluate(model_el, X_test_el_scaled, y_test_el)
coefs_el = coefs_el[['feature', 'coef', 'abs_coef']].sort_values('abs_coef', ascending=False)
coefs_el.head(10)




Unnamed: 0,feature,coef,abs_coef
0,log_sessions,0.099503,0.099503
1,log_ad_spend,0.091006,0.091006
2,log_returns_abs,0.055129,0.055129


In [10]:
metrics = pd.DataFrame([
    {'model': 'ridge_CM$', 'mae': mae_d, 'rmse': rmse_d, 'r2': r2_d},
    {'model': 'ridge_CM%', 'mae': mae_p, 'rmse': rmse_p, 'r2': r2_p},
    {'model': 'ridge_log_demand', 'mae': mae_el, 'rmse': rmse_el, 'r2': r2_el},
    {'model': 'ols_CM%', 'mae': mae_ols, 'rmse': rmse_ols, 'r2': r2_ols},
])
coefs_cm_d.to_csv(models_dir / 'coef_ridge_cm_dollars.csv', index=False)
coefs_cm_p.to_csv(models_dir / 'coef_ridge_cm_pct.csv', index=False)
coefs_el.to_csv(models_dir / 'coef_ridge_log_demand.csv', index=False)
metrics.to_csv(models_dir / 'metrics.csv', index=False)
print('Top CM$ coefs')
print(coefs_cm_d.head(10))
print('Top CM% coefs')
print(coefs_cm_p.head(10))
print('Elasticity coefs')
print(coefs_el.head(10))
print('Metrics')
print(metrics)


Top CM$ coefs
            feature         coef     abs_coef
18              mer  2927.945513  2927.945513
2         aov_proxy  1033.901286  1033.901286
6      Google_Spend   841.065887   841.065887
8    Email_SMS_Cost   816.835924   816.835924
7      TikTok_Spend   788.681180   788.681180
11       tiktok_cpc  -742.630342   742.630342
1   Conversion rate   626.700704   626.700704
9          meta_cpc  -399.326009   399.326009
5        Meta_Spend   340.362406   340.362406
19        cpa_proxy  -292.454207   292.454207
Top CM% coefs
                    feature      coef  abs_coef
18                      mer  0.035507  0.035507
3         return_rate_gross -0.031569  0.031569
2                 aov_proxy  0.028403  0.028403
11               tiktok_cpc  0.028308  0.028308
7              TikTok_Spend -0.017234  0.017234
0                  Sessions  0.014556  0.014556
6              Google_Spend -0.013869  0.013869
12  return_rate_gross_lag_7 -0.013080  0.013080
9                  meta_cpc  0.012