In [11]:
# import dependencies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [12]:
# create coffee_df, don't forget to pip install/conda openpyxl 

coffee_df = pd.read_csv('coffee2_df', index_col=0) #index col to 0 to use 'transaction_id' as the index
coffee_df.head()

Unnamed: 0_level_0,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,transaction_total
transaction_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,6.0
2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.2
3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,9.0
4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.0,Coffee,Drip coffee,Our Old Time Diner Blend Sm,2.0
5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.2


In [13]:
# First and late date of transactions

print(f"First date:\n{coffee_df['transaction_date'].min()}")
print(f"Last date:\n{coffee_df['transaction_date'].max()}")

First date:
2023-01-01
Last date:
2023-06-30


# want to create a 'day_of_week' column for weekly analyses

In [14]:
# see list of all days

print(coffee_df['transaction_date'].unique())

['2023-01-01' '2023-01-02' '2023-01-03' '2023-01-04' '2023-01-05'
 '2023-01-06' '2023-01-07' '2023-01-08' '2023-01-09' '2023-01-10'
 '2023-01-11' '2023-01-12' '2023-01-13' '2023-01-14' '2023-01-15'
 '2023-01-16' '2023-01-17' '2023-01-18' '2023-01-19' '2023-01-20'
 '2023-01-21' '2023-01-22' '2023-01-23' '2023-01-24' '2023-01-25'
 '2023-01-26' '2023-01-27' '2023-01-28' '2023-01-29' '2023-01-30'
 '2023-01-31' '2023-02-01' '2023-02-02' '2023-02-03' '2023-02-04'
 '2023-02-05' '2023-02-06' '2023-02-07' '2023-02-08' '2023-02-09'
 '2023-02-10' '2023-02-11' '2023-02-12' '2023-02-13' '2023-02-14'
 '2023-02-15' '2023-02-16' '2023-02-17' '2023-02-18' '2023-02-19'
 '2023-02-20' '2023-02-21' '2023-02-22' '2023-02-23' '2023-02-24'
 '2023-02-25' '2023-02-26' '2023-02-27' '2023-02-28' '2023-03-01'
 '2023-03-02' '2023-03-03' '2023-03-04' '2023-03-05' '2023-03-06'
 '2023-03-07' '2023-03-08' '2023-03-09' '2023-03-10' '2023-03-11'
 '2023-03-12' '2023-03-13' '2023-03-14' '2023-03-15' '2023-03-16'
 '2023-03-

In [15]:
# checking to see if all elements in 'transaction_date' column are consecutively increasing by 1 day.

datetime_objects = pd.to_datetime(coffee_df['transaction_date']).unique()

is_increasing = all((datetime_objects[i + 1] - datetime_objects[i]) == pd.Timedelta(days=1) for i in range(len(datetime_objects) - 1))


print(f"Is the list consecutively increasing by 1 day? {is_increasing}")

Is the list consecutively increasing by 1 day? True


In [16]:
# Create the 'day_of_week' column
coffee_df['day_of_week'] = pd.to_datetime(coffee_df['transaction_date']).dt.dayofweek

# Map day_of_week to day names starting from Sunday
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']
coffee_df['day_of_week'] = coffee_df['day_of_week'].map(lambda x: day_names[x])

coffee_df

Unnamed: 0_level_0,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,transaction_total,day_of_week
transaction_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg,6.00,Sunday
2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.20,Sunday
3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,9.00,Sunday
4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm,2.00,Sunday
5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,6.20,Sunday
...,...,...,...,...,...,...,...,...,...,...,...,...
149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg,5.00,Friday
149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg,6.00,Friday
149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg,3.00,Friday
149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino,3.75,Friday


In [17]:
# Number of all coffee sales per day of the week

weekly_coffee_sales = coffee_df[coffee_df['product_category'] == 'Coffee'].value_counts('day_of_week')
weekly_coffee_sales

day_of_week
Friday       8567
Thursday     8488
Monday       8468
Wednesday    8315
Tuesday      8304
Sunday       8261
Saturday     8013
Name: count, dtype: int64

In [18]:
# Number of instances of days_of_week with unique dates (drop duplicates)

unique_days = coffee_df[['transaction_date', 'day_of_week']].drop_duplicates()

day_of_week_counts = unique_days['day_of_week'].value_counts()

day_of_week_counts

day_of_week
Sunday       26
Monday       26
Tuesday      26
Wednesday    26
Thursday     26
Friday       26
Saturday     25
Name: count, dtype: int64

In [19]:
# Average sales per day of the week
avg_weekly_sales = weekly_coffee_sales/day_of_week_counts
avg_weekly_sales

day_of_week
Friday       329.500000
Monday       325.692308
Saturday     320.520000
Sunday       317.730769
Thursday     326.461538
Tuesday      319.384615
Wednesday    319.807692
Name: count, dtype: float64