In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv(r'C:\Users\matte\OneDrive\Desktop\GitHub\data\causal\daily_restaurant_sales.csv')
data.head()

Unnamed: 0,rest_id,day,month,weekday,weekend,is_holiday,is_dec,is_nov,competitors_price,discounts,sales
0,0,2016-01-01,1,4,False,True,False,False,2.88,0,79.0
1,0,2016-01-02,1,5,True,False,False,False,2.64,0,57.0
2,0,2016-01-03,1,6,True,False,False,False,2.08,5,294.0
3,0,2016-01-04,1,0,False,False,False,False,3.37,15,676.5
4,0,2016-01-05,1,1,False,False,False,False,3.79,0,66.0


In [2]:
import statsmodels.formula.api as smf

X = ["C(month)", "C(weekday)", "is_holiday", "competitors_price"]
regr_cate = smf.ols(f"sales ~ discounts*({'+'.join(X)})",
                            data=data).fit()

In [3]:
ols_cate_pred = (
            regr_cate.predict(data.assign(discounts=data["discounts"]+1)) 
            -regr_cate.predict(data)
        )

In [4]:
train = data.query("day<'2018-01-01'")
test = data.query("day>='2018-01-01'")

In [5]:
X = ["C(month)", "C(weekday)", "is_holiday", "competitors_price"]
regr_model = smf.ols(f"sales ~ discounts*({'+'.join(X)})",
                             data=train).fit()

cate_pred = (
            regr_model.predict(test.assign(discounts=test["discounts"]+1)) 
            -regr_model.predict(test)
        )

In [6]:
from sklearn.ensemble import GradientBoostingRegressor

X = ["month", "weekday", "is_holiday", "competitors_price", "discounts"]
y = "sales"

np.random.seed(1)
ml_model = GradientBoostingRegressor(n_estimators=50).fit(train[X],
                                                                  train[y])

ml_pred = ml_model.predict(test[X])

In [7]:
test_pred = test.assign(
            ml_pred=ml_pred,
            cate_pred=cate_pred,
            rand_m_pred=np.random.uniform(-1, 1, len(test)),
        )

In [11]:
from toolz import curry

@curry
def reg_cate(data, y, t):
    formula = f'{y} ~ {t}'
    model = smf.ols(formula,
                             data=data).fit()
    return model.params[t]

reg_cate(test, 'sales', 'discounts')

32.16196368039627

In [None]:
reg_cate(test, 'sales', 'discounts')

In [15]:
def effect_by_quantile(df, pred, y, t, q=10):
             
             # makes quantile partitions
             groups = np.round(pd.IntervalIndex(pd.qcut(df[pred], q=q)).mid, 0) 
             
             return (df
                     .assign(**{f"{pred}_quantile": groups})
                     .groupby(f"{pred}_quantile")
                     # estimate the effect on each quantile
                     .apply(reg_cate(y=y, t=t))) 

         
effect_by_quantile(test_pred, "cate_pred", y="sales", t="discounts")

cate_pred_quantile
18.0    20.494153
24.0    24.782101
27.0    27.494156
29.0    28.833993
31.0    29.604257
33.0    32.216500
35.0    35.889459
37.0    36.846889
39.0    39.125449
47.0    44.272549
dtype: float64

In [20]:
def cumulative_gain_curve(df, prediction, y, t,
                                   ascending=False, normalize=False, steps=100):
             
             effect_fn = reg_cate(t=t, y=y)
             normalizer = effect_fn(df) if normalize else 0
             
             size = len(df)
             ordered_df = (df
                           .sort_values(prediction, ascending=ascending)
                           .reset_index(drop=True))
             
             steps = np.linspace(size/steps, size, steps).round(0)
             effects = [(effect_fn(ordered_df.query(f"index<={row}"))
                         -normalizer)*(row/size) 
                        for row in steps]

             return np.array([0] + effects)

         
cumulative_gain_curve(test_pred, "cate_pred", "sales", "discounts")

array([ 0.        ,  0.50387597,  0.982917  ,  1.38863289,  1.83046877,
        2.26408709,  2.71880783,  3.13779256,  3.58003148,  4.01798404,
        4.42379877,  4.83861979,  5.23242282,  5.62890903,  5.98551452,
        6.38159081,  6.74402024,  7.13257317,  7.5088165 ,  7.9068624 ,
        8.26968674,  8.63441305,  9.03646968,  9.41649072,  9.76869065,
       10.13172457, 10.48178647, 10.85654343, 11.20031099, 11.57715392,
       11.88179285, 12.22484979, 12.65329367, 13.00147394, 13.35835129,
       13.75177168, 14.08571614, 14.44720258, 14.8206963 , 15.16915263,
       15.48942971, 15.87058782, 16.18483861, 16.5889676 , 16.87711862,
       17.226077  , 17.51523959, 17.85547549, 18.1943502 , 18.48062947,
       18.77790182, 19.05825319, 19.38214231, 19.7240743 , 20.00659491,
       20.35269886, 20.65268403, 20.93862963, 21.18225404, 21.53351325,
       21.82793867, 22.09450014, 22.38142964, 22.62161465, 22.87752468,
       23.11063608, 23.3799479 , 23.70688475, 23.97456335, 24.32

In [25]:
def maxOperations(nums, k):
        """
        :type nums: List[int]
        :type k: int
        :rtype: int
        """
        c = Counter(nums)
        output = 0
        seen = set()

        for x in c:
            if x not in seen and (k-x) in c:
                if x == (k-x):
                    output += c[x]//2
                else:
                    output += min(c[x],c[k-x])
                seen.add(x)
                seen.add(k-x)
        
        return output

In [26]:
maxOperations([1,2,3,4], 3)

[[None, 3, 4, 5], [3, None, 5, 6], [4, 5, None, 7], [5, 6, 7, None]]

In [51]:
def maxVowels(s, k):
        """
        :type s: str
        :type k: int
        :rtype: int
        """
        vowels = set(('a','e','i','o','u'))
        curr_vow = sum([1 for i in s[:k] if i in vowels])
        max_vow = curr_vow

        for idx in range(1, len(s)-k+1):
                if s[idx-1] in vowels:
                    curr_vow -= 1
                if s[idx+k-1] in vowels:
                    curr_vow += 1
                max_vow = max(max_vow, curr_vow)
        return max_vow

In [53]:
maxVowels("abciiidef", 3)

3