## Supermarket Simulation : Markov Chain Simulation of Customer Behavior

In [1]:
import glob
import random
import pandas as pd

#### read data and prepare dataframe for transition probability calculation

In [2]:
# prepare dataframe for calculating transition probability matrix 
data_all = pd.DataFrame()
for file in glob.glob('../data/*.csv'):
    weekday_tag = file[8:10] # prefix for customer_no
    print(weekday_tag)
    df = pd.read_csv(file, sep=';', parse_dates=True, index_col=[0])
    df['customer_no'] = weekday_tag + df['customer_no'].astype(str) # add weekday_tag
    df.groupby('customer_no').resample('1 min').ffill() # groupby customer
    df_fill= df.groupby('customer_no').resample('1 min').ffill() # resample in 1 min resolution
    df_fill['before'] = df_fill['location'].shift(1).fillna('entrance') # set first Nan as entrance
    # set shifted checkout from last customer as entrance
    df_fill['before'].loc[df_fill['before']=='checkout'] = 'entrance' 
    data_all = data_all.append(df_fill)

data_all.to_csv('data_all.csv')

tu
mo
th
we
fr


#### calculate transition probability matrix

In [3]:
# calculate the transition probability matrix
states = ['entrance', 'dairy', 'drinks', 'fruit','spices','checkout']
tpm = pd.DataFrame(0, index=states, columns=states) # iniciate transformation probability matrix
tpm = tpm + pd.crosstab(data_all['before'], data_all['location'], normalize=0) # fill in calculated values
tpm.fillna(0, inplace=True) # fillna with probability=0
tpm['checkout'].iloc[tpm.index == 'checkout'] = 1 # checkout the absorbtion state
tpm.to_csv('tpm.csv')
tpm

Unnamed: 0,checkout,dairy,drinks,entrance,fruit,spices
checkout,1.0,0.0,0.0,0.0,0.0,0.0
dairy,0.102678,0.737032,0.058625,0.0,0.050073,0.051592
drinks,0.215505,0.011001,0.598499,0.0,0.088012,0.086983
entrance,0.0,0.287583,0.153296,0.0,0.377646,0.181475
fruit,0.201054,0.095924,0.055241,0.0,0.597104,0.050677
spices,0.149888,0.193533,0.163109,0.0,0.091271,0.402198


#### one simple simulation

In [4]:
# run one simple markov chain simulation
inside = 1 # not jet checktout
state_list = ['entrance'] # iniciate sequence starting with entrance
while inside:
    states = tpm.columns
    # markov chain
    state = random.choices(states, weights=tpm.iloc[tpm.index == state_list[-1]].values.tolist()[0])
    state_list += state # apend state
    if state == ['checkout']:
        inside = 0
        break
state_list

['entrance', 'drinks', 'checkout']