In [5]:
%matplotlib inline
import numpy as np
import pandas as pd
from pathlib import Path
from statistics import mean
from matplotlib import style
import matplotlib.pyplot as plt

In [6]:
coffee_data = "Coffee_sales.csv"
coffee_df = pd.read_csv(coffee_data)
coffee_df.dtypes

hour_of_day      int64
cash_type       object
money          float64
coffee_name     object
Time_of_Day     object
Weekday         object
Month_name      object
Weekdaysort      int64
Monthsort        int64
Date            object
Time            object
dtype: object

In [7]:
coffee_df = coffee_df.drop(['Time_of_Day','hour_of_day'],axis=1)
coffee_df.head()

Unnamed: 0,cash_type,money,coffee_name,Weekday,Month_name,Weekdaysort,Monthsort,Date,Time
0,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,10:15:50.520000
1,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:19:22.539000
2,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:20:18.089000
3,card,28.9,Americano,Fri,Mar,5,3,2024-03-01,13:46:33.006000
4,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,13:48:14.626000


In [8]:
# Finding null values
for column in coffee_df.columns:
    print(f"Column {column} has {coffee_df[column].isnull().sum()} null values")

Column cash_type has 0 null values
Column money has 0 null values
Column coffee_name has 0 null values
Column Weekday has 0 null values
Column Month_name has 0 null values
Column Weekdaysort has 0 null values
Column Monthsort has 0 null values
Column Date has 0 null values
Column Time has 0 null values


In [9]:
# Renaming to more conventional nomenclature. (coffee_name??? some aren't even coffee)
coffee_df.rename(columns = {"cash_type": "Payment_Method",
                            "money": "Payment_Amount",
                           "coffee_name":"Product_Name",
                           "Month_name":"Month",
                           "Weekdaysort":"WeekNum",
                           "Monthsort":"MonthNum"},inplace = True)


In [10]:
coffee_df.head()

Unnamed: 0,Payment_Method,Payment_Amount,Product_Name,Weekday,Month,WeekNum,MonthNum,Date,Time
0,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,10:15:50.520000
1,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:19:22.539000
2,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:20:18.089000
3,card,28.9,Americano,Fri,Mar,5,3,2024-03-01,13:46:33.006000
4,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,13:48:14.626000


In [11]:
coffee_df['Product_Name'].unique()

array(['Latte', 'Hot Chocolate', 'Americano', 'Americano with Milk',
       'Cocoa', 'Cortado', 'Espresso', 'Cappuccino'], dtype=object)

In [25]:
monthcheck = coffee_df.groupby('MonthNum').size()
monthcheck

MonthNum
1     201
2     423
3     494
4     168
5     241
6     223
7     237
8     272
9     344
10    426
11    259
12    259
dtype: int64

In [12]:
# Filtering down to coffee or coffee adjacent items
coffee_sales_df = coffee_df.filter(['Product_Name','Payment_Amount','MonthNum'])
coffee_sales_df = coffee_sales_df.loc[(coffee_sales_df['Product_Name'] == 'Latte') |
                                        (coffee_sales_df['Product_Name'] == 'Americano') |
                                        (coffee_sales_df['Product_Name'] == 'Americano with Milk') |
                                        (coffee_sales_df['Product_Name'] == 'Cortado') |
                                        (coffee_sales_df['Product_Name'] == 'Espresso') |
                                        (coffee_sales_df['Product_Name'] == 'Cappuccino') & (coffee_sales_df['Product_Name'] == 3) ].reset_index(drop=True)
coffee_sales_df

Unnamed: 0,Product_Name,Payment_Amount,MonthNum
0,Latte,38.70,3
1,Americano,28.90,3
2,Latte,38.70,3
3,Americano with Milk,33.80,3
4,Americano with Milk,33.80,3
...,...,...,...
2541,Americano with Milk,30.86,3
2542,Americano with Milk,30.86,3
2543,Latte,35.76,3
2544,Americano,25.96,3


In [14]:
# Chocolate Items
coffee_sales_df = coffee_df.filter(['Product_Name','Payment_Amount','MonthNum'])
chocolate_sales_df = coffee_sales_df.loc[(coffee_sales_df['Product_Name'] == 'Hot Chocolate') | 
                                        (coffee_sales_df['Product_Name'] == 'Cocoa')].reset_index(drop=True)
chocolate_sales_df

Unnamed: 0,Product_Name,Payment_Amount,MonthNum
0,Hot Chocolate,38.70,3
1,Hot Chocolate,38.70,3
2,Hot Chocolate,38.70,3
3,Cocoa,38.70,3
4,Hot Chocolate,38.70,3
...,...,...,...
510,Hot Chocolate,35.76,3
511,Hot Chocolate,35.76,3
512,Cocoa,35.76,3
513,Cocoa,35.76,3


In [None]:
coffee_sales_df.plot(kind="bar")