In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestRegressor

feature_generator = ColumnTransformer(
    [
        ('item_id', OneHotEncoder(), ['item_id']), 
        ('date', OneHotEncoder(), ['date']),
        ('item_description', 
             CountVectorizer(min_df=0.05, ngram_range=(1, 3)), 
             'item_description'),
        ('numeric_feats', StandardScaler(), 
             ['day_of_week', 'stock_age_in_days'])
    ], remainder='drop'
)

model_q = Pipeline([
    ('feat_proc', feature_generator),
    ('model_q', RandomForestRegressor()) 
])
model_p = Pipeline([
    ('feat_proc', feature_generator),
    ('model_p', RandomForestRegressor())
])

In [None]:
from sklearn.model_selection import KFold

# Since Q might be 0, can't just take logs. This is a quick
# workaround for demonstration. Better options exist.
df_mdl['LnP'] = np.log1p(df_mdl['P'])
df_mdl['LnQ'] = np.log1p(df_mdl['Q'])
elast_estimates = list()

# Step 1: split into two halves
for idx_aux, idx_inf in KFold(
    n_splits=2, shuffle=True).split(df_mdl):
    
    df_aux = df_mdl.iloc[idx_aux]
    df_inf = df_mdl.iloc[idx_inf].copy()
    
    # Step 2+3: fit auxiliary models on first half
    model_q.fit(df_aux, df_aux['LnQ'])
    model_p.fit(df_aux, df_aux['LnP'])
    
    # Step 4: residualize in second half
    df_inf = df_inf.assign(
        LnP_res = df_inf['LnP'] - model_p.predict(df_inf),
        LnQ_res = df_inf['LnQ'] - model_q.predict(df_inf),
    )
    
   # Step 5: DML inference
    elast = (
        df_inf['LnP_res'].dot(df_inf['LnQ_res'])
        /
        df_inf['LnP_res'].dot(df_inf['LnP'])
        # the last part here deviates from standard OLS solution
    )
    
    print('DML elasticity:', elast)
    elast_estimates.append(elast)

    print('OLS elasticity for comparison:',
        df_inf['LnP_res'].dot(df_inf['LnQ_res'])
        /
        df_inf['LnP_res'].dot(df_inf['LnP_res'])
    )    

# Step 6: Take the mean of both estimates
print("DML efficient estimate of elasticity:", np.mean(elast_estimates))