In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
plt.rcParams['figure.figsize'] = (20,6)

In [2]:
data = pd.read_csv('weekdays/monday.csv', delimiter = ';', parse_dates=True)

In [3]:
data.head()

Unnamed: 0,timestamp,customer_no,location
0,2019-09-02 07:03:00,1,dairy
1,2019-09-02 07:03:00,2,dairy
2,2019-09-02 07:04:00,3,dairy
3,2019-09-02 07:04:00,4,dairy
4,2019-09-02 07:04:00,5,spices


In [4]:
data.dtypes

timestamp      object
customer_no     int64
location       object
dtype: object

In [5]:
data['timestamp'] = pd.to_datetime(data['timestamp'])

In [6]:
data.dtypes

timestamp      datetime64[ns]
customer_no             int64
location               object
dtype: object

In [7]:
data.shape

(4884, 3)

In [8]:
data.isna().any()

timestamp      False
customer_no    False
location       False
dtype: bool

In [9]:
data.head()

Unnamed: 0,timestamp,customer_no,location
0,2019-09-02 07:03:00,1,dairy
1,2019-09-02 07:03:00,2,dairy
2,2019-09-02 07:04:00,3,dairy
3,2019-09-02 07:04:00,4,dairy
4,2019-09-02 07:04:00,5,spices


In [10]:
#new col for time
data['time'] = data['timestamp'].dt.time

In [11]:
data.head()

Unnamed: 0,timestamp,customer_no,location,time
0,2019-09-02 07:03:00,1,dairy,07:03:00
1,2019-09-02 07:03:00,2,dairy,07:03:00
2,2019-09-02 07:04:00,3,dairy,07:04:00
3,2019-09-02 07:04:00,4,dairy,07:04:00
4,2019-09-02 07:04:00,5,spices,07:04:00


In [12]:
data_clean = data.drop('timestamp', axis=1).set_index('time')

In [13]:
data_clean

Unnamed: 0_level_0,customer_no,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1
07:03:00,1,dairy
07:03:00,2,dairy
07:04:00,3,dairy
07:04:00,4,dairy
07:04:00,5,spices
...,...,...
21:49:00,1442,checkout
21:49:00,1444,checkout
21:49:00,1445,dairy
21:50:00,1446,dairy


In [14]:
#total no of cust
data_clean['customer_no'].value_counts().count()

1447

In [15]:
#Calculate the total number of customers in each section
data_clean.groupby(by='location')['customer_no'].count().sort_values(ascending=False)

location
checkout    1437
fruit       1005
dairy        895
drinks       797
spices       750
Name: customer_no, dtype: int64

In [16]:
#Calculate the total number of customers in each section over time
data_clean.groupby(by=['location', 'time'])['customer_no'].count()

location  time    
checkout  07:05:00    2
          07:06:00    2
          07:07:00    1
          07:08:00    3
          07:09:00    1
                     ..
spices    21:43:00    1
          21:44:00    1
          21:46:00    2
          21:48:00    1
          21:49:00    2
Name: customer_no, Length: 2797, dtype: int64

In [17]:
#Count cust for each hour for location
pd.DataFrame(data_clean.groupby(pd.Grouper('location')).count())

Unnamed: 0_level_0,customer_no
location,Unnamed: 1_level_1
dairy,895
spices,750
fruit,1005
checkout,1437
drinks,797


In [18]:
#Display the number of customers at checkout over time
pd.DataFrame(data_clean.groupby(by=['location', 'time'])['customer_no'].count()).filter(like='checkout', axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,customer_no
location,time,Unnamed: 2_level_1
checkout,07:05:00,2
checkout,07:06:00,2
checkout,07:07:00,1
checkout,07:08:00,3
checkout,07:09:00,1
checkout,...,...
checkout,21:44:00,1
checkout,21:46:00,1
checkout,21:47:00,2
checkout,21:48:00,2


In [19]:
#Calculate the time each customer spent in the market
pd.DataFrame(data_clean.groupby(by=['customer_no', 'time'])['location'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,location
customer_no,time,Unnamed: 2_level_1
1,07:03:00,1
1,07:05:00,1
2,07:03:00,1
2,07:06:00,1
3,07:04:00,1
...,...,...
1444,21:48:00,1
1444,21:49:00,1
1445,21:49:00,1
1446,21:50:00,1


_____

In [32]:
data_clean

Unnamed: 0_level_0,customer_no,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1
07:03:00,1,dairy
07:03:00,2,dairy
07:04:00,3,dairy
07:04:00,4,dairy
07:04:00,5,spices
...,...,...
21:49:00,1442,checkout
21:49:00,1444,checkout
21:49:00,1445,dairy
21:50:00,1446,dairy


In [34]:
pd.DataFrame(data_clean.groupby(by=['time','location'])['customer_no'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,customer_no
time,location,Unnamed: 2_level_1
07:03:00,dairy,2
07:04:00,dairy,2
07:04:00,fruit,1
07:04:00,spices,3
07:05:00,checkout,2
...,...,...
21:49:00,dairy,1
21:49:00,fruit,2
21:49:00,spices,2
21:50:00,dairy,1
