In [None]:
import pandas as pd

coffee = pd.read_csv("../data/new_coffee.csv")

In [None]:
# Rellenar los valores NaN en las columnas num√©ricas con 0

fillna_object = {
  "Units Sold": 0,
  "Revenue": 0,
  "Cost": 0,
  "Profit": 0
}

coffee.fillna(fillna_object, inplace=True)

In [None]:
# Calcular el margen de beneficio y redondearlo a 2 decimales

coffee["Profit Margin"] = (coffee["Profit"] / coffee["Revenue"] * 100).round(2)
coffee.fillna({"Profit Margin": 0}, inplace=True)

In [None]:
# Obtener un ranking de los tipos de cafes mas vendidos
def solf_coffee_ranking(df, n):
  grouped = df.groupby("Coffee Type")["Units Sold"].sum()
  return grouped.sort_values(ascending=False).head(n).reset_index(name="Total Units Sold")

In [None]:
# Obtener el promedio de ventas por tipo de cliente
def average_sales_per_customer(df):
  return df.groupby("Customer Type")["Units Sold"].mean().sort_values(ascending=False).reset_index(name="Average Sales").round(2)

In [None]:
# Obtener un ranking de los metodos de pagos mas convenientes 
def payment_methods_ranking(df, n):
  return df.groupby("Payment Method")["Profit"].sum().sort_values(ascending=False).head(n).reset_index(name="Total Profit")

In [None]:
# Obtener fechas en las que no hubo ventas
def days_with_no_sales(df):
  return df[df["Units Sold"] == 0]["Date"].reset_index(name="No Sales Date")

days_with_no_sales(coffee)

In [None]:
def coffee_type_performance(df, coffee_type=None):
  if coffee_type:
    subset = df[df["Coffee Type"] == coffee_type]
    total_sales = subset["Units Sold"].sum()
    total_profit = subset["Profit"].sum()
    total_revenue = subset["Revenue"].sum()
    profit_margin = (total_profit / total_revenue * 100).round(2) if total_revenue else 0.0
    return pd.DataFrame({
      "Coffee Type": [coffee_type],
      "Total Sales": [total_sales],
      "Total Profit": [total_profit],
      "Profit Margin %": [profit_margin],
    })
  
  df_grouped = df.groupby("Coffee Type")
  total_sales = df_grouped["Units Sold"].sum().round(2)
  total_profit = df_grouped["Profit"].sum().round(2)
  total_revenue = df_grouped["Revenue"].sum().round(2)
  margin_pct = (total_profit / total_revenue * 100).replace([float("inf"), -float("inf")], 0).fillna(0).round(2)
  return pd.DataFrame({
    "Total Sales": total_sales,
    "Total Profit": total_profit,
    "Total Revenue": total_revenue,
    "Total Margin %": margin_pct,
  })

Unnamed: 0_level_0,Total Sales,Total Profit,Total Revenue,Total Margin %
Coffee Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Americano,54.0,108.0,189.0,57.14
Cappuccino,18.0,45.0,81.0,55.56
Cold Brew,28.0,98.0,168.0,58.33
Espresso,185.0,370.0,555.0,66.67
Flat White,20.0,52.0,90.0,57.78
Latte,140.0,415.0,720.0,57.64
Mocha,42.0,143.0,273.0,52.38


In [None]:
def weighted_coffee_ranking(df, weights=None):
  weights = weights or {"units": 0.4, "profit": 0.4, "promo": 0.2}
  tmp = df.copy()

  tmp["promo_used"] = tmp["Promotion Applied"].str.lower().eq("yes").astype(int)

  grouped = tmp.groupby("Coffee Type").agg(
    units=("Units Sold", "sum"),
    profit=("Profit", "sum"),
    promo_rate=("promo_used", "mean"),
  ).fillna(0)

  max_vals = grouped.max().replace(0, 1)

  normalized = grouped / max_vals

  grouped["weighted_score"] = (
    normalized["units"] * weights["units"]
    + normalized["profit"] * weights["profit"]
    + normalized["promo_rate"] * weights["promo"]
  )

  return grouped.sort_values("weighted_score", ascending=False).reset_index()

weighted_coffee_ranking(coffee)

Unnamed: 0,Coffee Type,units,profit,promo_rate,weighted_score
0,Latte,140.0,415.0,0.714286,0.84556
1,Espresso,185.0,370.0,0.0,0.756627
2,Mocha,42.0,143.0,1.0,0.428642
3,Cold Brew,28.0,98.0,1.0,0.354998
4,Flat White,20.0,52.0,1.0,0.293364
5,Americano,54.0,108.0,0.0,0.220853
6,Cappuccino,18.0,45.0,0.0,0.082292
