In [1]:
import pandas as pd
import numpy as np
from io import StringIO

In [2]:
alldays = pd.DatetimeIndex(start='1.1.17',end='31.12.17',freq='D')

In [3]:
customers = [123, 234, 345, 456, 567, 678, 789, 890]

In [4]:
multiindex = pd.MultiIndex.from_product([customers,alldays],names=['customer','date'])

In [5]:
csvdata = StringIO('''customer,date,a,b,c
123,01.01.17,1,0
234,02.01.17,0,0
345,01.01.17,0,0
456,04.01.17,1,1
567,03.01.17,1,0
678,08.01.17,1,1
789,13.01.17,0,1
890,05.01.17,1,0
123,02.01.17,1,0
234,03.01.17,0,0
345,02.01.17,0,0
456,05.01.17,1,1
567,04.01.17,1,0
678,10.01.17,1,1
789,18.01.17,0,1
890,15.01.17,1,0
123,10.01.17,1,0
234,20.01.17,0,0
345,15.01.17,0,0
456,24.01.17,1,1
567,30.01.17,1,0
678,18.01.17,1,1
789,23.01.17,0,1
890,25.01.17,1,0
123,1.2.17,1,0
234,2.2.17,0,0
345,1.2.17,0,0
456,4.2.17,1,1
567,3.2.17,1,0
678,8.2.17,1,1
789,13.2.17,0,1
890,5.2.17,1,0
123,2.2.17,1,0
234,3.2.17,0,0
345,2.2.17,0,0
456,5.2.17,1,1
567,4.2.17,1,0
678,10.2.17,1,1
789,18.2.17,0,1
890,15.2.17,1,0
123,10.2.17,1,0
234,20.2.17,0,0
345,15.2.17,0,0
456,24.2.17,1,1
567,27.2.17,1,0
678,18.2.17,1,1
789,23.2.17,0,1
890,25.2.17,1,0
123,1.3.17,1,0
234,2.3.17,0,0
345,1.3.17,0,0
456,4.3.17,1,1
567,3.3.17,1,0
678,8.3.17,1,1
789,13.3.17,0,1
890,5.3.17,1,0
123,2.3.17,1,0
234,3.3.17,0,0
345,2.3.17,0,0
456,5.3.17,1,1
567,4.3.17,1,0
678,10.3.17,1,1
789,18.3.17,0,1
890,15.3.17,1,0
123,10.3.17,1,0
234,20.3.17,0,0
345,15.3.17,0,0
456,24.3.17,1,1
567,30.3.17,1,0
678,18.3.17,1,1
789,23.3.17,0,1
890,25.3.17,1,0
''')

In [None]:
# indexer object that facilitates working with multi indices
# see also https://pandas.pydata.org/pandas-docs/stable/advanced.html#using-slicers
idx = pd.IndexSlice

In [37]:
df = pd.read_csv(csvdata,sep=',')

In [38]:
df.date = df.date.apply(lambda x: pd.datetime.strptime(x,'%d.%m.%y'))

In [43]:
df = df.set_index(['customer', 'date']).reindex(multiindex).fillna(0)

In [159]:
# use coloumns a through c for determining whether customer was active that day or not
activity = df[['a','b','c']].apply(any, axis=1)

# not any is used to determine wether customer churned or not. Not any activity means churned.
def notany(x):
    return not any(x)

# use the activity series to determine for each day whether the customer churned in the following n days
def churnNday(activity, n):
    return activity.rolling(n).apply(notany).groupby(level='customer').shift(-n).astype(bool)

In [160]:
churn7d = churnNday(activity, 7)
churn7d.head()

customer  date      
123       2017-01-01    False
          2017-01-02     True
          2017-01-03    False
          2017-01-04    False
          2017-01-05    False
dtype: bool

In [91]:
df['churn'] = churnNday(activity, 1)

In [161]:
df['chrun7day'] = churnNday(activity, 7)

In [162]:
# show first 10 days of every customer to validate the churn variables
df.loc[idx[:,:'2017-1-10'],:]

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,churn,chrun7day
customer,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
123,2017-01-01,1.0,0.0,0.0,False,False
123,2017-01-02,1.0,0.0,0.0,True,True
123,2017-01-03,0.0,0.0,0.0,True,False
123,2017-01-04,0.0,0.0,0.0,True,False
123,2017-01-05,0.0,0.0,0.0,True,False
123,2017-01-06,0.0,0.0,0.0,True,False
123,2017-01-07,0.0,0.0,0.0,True,False
123,2017-01-08,0.0,0.0,0.0,True,False
123,2017-01-09,0.0,0.0,0.0,False,False
123,2017-01-10,1.0,0.0,0.0,True,True
