In [3]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("Coffe_sales.csv")

target_col = ["hour_of_day","money","Weekdaysort","Monthsort"]

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Hour_Of_Day : 22
Min of Hour_Of_Day : 6
Range of Hour_Of_Day : 16
Mean of Hour_Of_Day : 14.19
Mode of Hour_Of_Day : 10
Variance of Hour_Of_Day : 17.93
Standard Deviance of Hour_Of_Day : 4.23

Max of Money : 38.7
Min of Money : 18.12
Range of Money : 20.580000000000002
Mean of Money : 31.65
Mode of Money : 35.76
Variance of Money : 23.79
Standard Deviance of Money : 4.88

Max of Weekdaysort : 7
Min of Weekdaysort : 1
Range of Weekdaysort : 6
Mean of Weekdaysort : 3.85
Mode of Weekdaysort : 2
Variance of Weekdaysort : 3.89
Standard Deviance of Weekdaysort : 1.97

Max of Monthsort : 12
Min of Monthsort : 1
Range of Monthsort : 11
Mean of Monthsort : 6.45
Mode of Monthsort : 3
Variance of Monthsort : 12.26
Standard Deviance of Monthsort : 3.5



In [11]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Average Price Per Coffee
data = pd.read_csv("Coffe_sales.csv")

# Unique values for Coffee Name
types_groups = data['coffee_name'].unique().tolist()

results = []

for types in types_groups:
    # Filter data by coffee type
    types_data = data[data['coffee_name'] == types]

    # Total sales count
    total_sales = types_data['money'].count()

    # Average price
    avg_price = round(types_data['money'].mean(), 2)

    # Result row
    result_row = {
        'Coffee Name': types,
        'Total Sales': total_sales,
        'Average Price': avg_price
    }

    results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Total Sales', ascending=False)
print(res_df)

           Coffee Name  Total Sales  Average Price
3  Americano with Milk          809          30.59
0                Latte          757          35.50
2            Americano          564          25.98
7           Cappuccino          486          35.88
5              Cortado          287          25.73
1        Hot Chocolate          276          35.99
4                Cocoa          239          35.65
6             Espresso          129          20.85


In [15]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Total Coffee Order in Every Time Category for Each Coffee
df = pd.read_csv("Coffe_sales.csv")

# Categorize Time Of Order
def categorize_day(val):
    if val <= 12:
        return "Morning"
    elif val > 12 and val <= 18:
        return "Afternoon"
    else:
        return "Evening"

df["Time Category"] = df["hour_of_day"].apply(categorize_day)

# Unique values for Time Category
time_groups = df['Time Category'].unique().tolist()

# Unique values for Coffee Name
coffee_groups = df['coffee_name'].unique().tolist()

results = []

for time_cat in time_groups:
    for coffee in coffee_groups:
        # Filter data by Time Category and Coffee Name
        filtered_data = df[(df['Time Category'] == time_cat) & (df['coffee_name'] == coffee)]

        # Total coffee order
        total_orders = filtered_data['coffee_name'].count()

        if total_orders > 0:
            result_row = {
                'Time Category': time_cat,
                'Coffee Name': coffee,
                'Total Orders': total_orders
            }

            results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by=['Total Orders'], ascending=[False])
print(res_df)

   Time Category          Coffee Name  Total Orders
3        Morning  Americano with Milk           386
8      Afternoon                Latte           318
11     Afternoon  Americano with Milk           276
2        Morning            Americano           274
0        Morning                Latte           261
10     Afternoon            Americano           242
15     Afternoon           Cappuccino           211
16       Evening                Latte           178
5        Morning              Cortado           163
7        Morning           Cappuccino           151
19       Evening  Americano with Milk           147
23       Evening           Cappuccino           124
9      Afternoon        Hot Chocolate           123
12     Afternoon                Cocoa            98
17       Evening        Hot Chocolate            95
13     Afternoon              Cortado            90
4        Morning                Cocoa            72
20       Evening                Cocoa            69
14     After

In [19]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Total Coffee Order in Every Time Category for Each Drink Type
df = pd.read_csv("Coffe_sales.csv")

coffee_items = [
    "Espresso", "Americano", "Americano with Milk", "Latte", "Cappuccino", "Mocha", "Cortado", "Macchiato", "Flat White", "Long Black"
]

# Categorize Coffee
def categorize_coffee(name):
    if name in coffee_items:
        return "Coffee"
    else:
        return "Non-Coffee"

df["Coffee Category"] = df["coffee_name"].apply(categorize_coffee)

# Unique values for Time Category
time_groups = df['Coffee Category'].unique().tolist()

# Unique values for Coffee Name
coffee_groups = df['coffee_name'].unique().tolist()

results = []

for cof_cat in time_groups:
    for coffee in coffee_groups:
        # Filter data by Time Category and Coffee Name
        filtered_data = df[(df['Coffee Category'] == cof_cat) & (df['coffee_name'] == coffee)]

        # Total coffee order
        total_orders = filtered_data['coffee_name'].count()

        if total_orders > 0:
            result_row = {
                'Coffee Category': cof_cat,
                'Coffee Name': coffee,
                'Total Orders': total_orders
            }

            results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by=['Total Orders'], ascending=[False])
print(res_df)

  Coffee Category          Coffee Name  Total Orders
2          Coffee  Americano with Milk           809
0          Coffee                Latte           757
1          Coffee            Americano           564
5          Coffee           Cappuccino           486
3          Coffee              Cortado           287
6      Non-Coffee        Hot Chocolate           276
7      Non-Coffee                Cocoa           239
4          Coffee             Espresso           129
