In [6]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.model_selection import cross_val_score
from sklego.meta import ZeroInflatedRegressor

# Note the final line of code in this block. We're setting y=0 for all weekend dates
# while we simulate standard regression data for all the other dates.
df = (pd.DataFrame({'dt': pd.date_range("2018-01-01", "2021-01-01")})
      .assign(x=lambda d: np.random.normal(0, 1, d.shape[0]))
      .assign(weekend = lambda d: (d['dt'].dt.weekday >= 5).astype(np.int16))
      .assign(y=lambda d: np.where(d['weekend'], 0, 1.5 + 0.87 * d['x'] + np.random.normal(0, 0.2, d.shape[0]))))

In [9]:
df.tail()

Unnamed: 0,dt,x,weekend,y
1092,2020-12-28,-0.21663,0,1.582935
1093,2020-12-29,1.600085,0,2.573219
1094,2020-12-30,0.775335,0,2.240295
1095,2020-12-31,-0.536147,0,0.83811
1096,2021-01-01,-0.681098,0,0.839873


In [7]:
X, y = df[['x', 'weekend']].values, df['y'].values


In [8]:
zir = ZeroInflatedRegressor(
    classifier=LogisticRegression(),
    regressor=Ridge()
)

lr = Ridge(random_state=0)

print('ZIR r²:', cross_val_score(zir, X, y).mean())
print(' LR r²:', cross_val_score(lr, X, y).mean())

ZIR r²: 0.9710031331616351
 LR r²: 0.8207276105891473
