In [None]:
# function to extract transition probabilities for one household 

def get_markov_prob(df:pd.DataFrame,start:str,end:str): 
    dummy = df.query("@start < start_date < @end")
    dummy = dummy.resample('2h',on='start_date',offset="01h00min").agg(vc = ('transition','value_counts')).fillna(0).reset_index().rename(columns = {'vc': 'transition_count'})
    dummy = dummy.assign(Time = dummy.start_date.dt.hour)
    dummy = dummy.groupby(['Time','transition'])['transition_count'].sum().to_frame().unstack().fillna(0).stack().reset_index()
    dummy['transition'] = dummy.transition.astype(str)
    dummy[['source','sink']] = dummy.transition.str.split('>', expand=True)
    dummy['total'] = dummy.groupby(['Time','source'])['transition_count'].transform('sum')
    dummy['markov'] = dummy['transition_count']/dummy['total']
    dummy['markov'] = dummy.markov.fillna(0)
    dummy = dummy.set_index(['Time','source','sink'])['markov'].unstack().fillna(0)
    return(dummy)

In [None]:
# set up for the sliding window

df = df
bwin,cwin,freq = (3,'M'),(3,'W'),'2d'
start_date,end_date = df.start_date.dt.date.agg(['min','max']).astype('datetime64')
base_line = start_date + np.timedelta64(*bwin)
current = base_line + np.timedelta64(*cwin)
clean_date = lambda x: pd.to_datetime(str(x.date())) #clearing time out because timedelta brings in time
rng = pd.date_range(clean_date(base_line),clean_date(end_date),freq=freq)

# run the sliding window

similarity = {} #return similarity measures for each window and store in dictionary
for win in rng:
    baseline_markov = get_markov_prob(df,base_line-np.timedelta64(*bwin),base_line) 
    current_markov = get_markov_prob(df,current-np.timedelta64(*cwin),current)
    similarity[win] = (baseline_markov-current_markov).groupby(by='Time').apply(lambda x: np.linalg.norm(x.values)) # for each window, compare baseline and current transition matrices for each time (of day), and return Frobenius distance
    shift = win - base_line
    base_line,current = base_line + shift,current + shift
    #print(f'{base_line}-{current}')
    #print(baseline_markov)
    #print(current_markov)

In [None]:
df_similarity = pd.DataFrame.from_dict(similarity) # return as dataframe
df_similarity