In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from Favorita_TSA.utils.data_loader import parquet_loader
from Favorita_TSA.utils.dataset import Dataset
from Favorita_TSA.viz.color_manager import ColorManager
from Favorita_TSA.viz.ploty_export import save_all
from Favorita_TSA.viz.ploty_theme import set_plotly_theme

In [None]:
import os
from pathlib import Path

print("Current working directory:", os.getcwd())

PROJECT_ROOT = Path("..").resolve()

# Setze das Arbeitsverzeichnis auf das Hauptprojektverzeichnis
os.chdir(f"{PROJECT_ROOT}")

# √úberpr√ºfe, ob das Verzeichnis korrekt gesetzt wurde
print("Current working directory:", os.getcwd())

In [None]:
set_plotly_theme()
c = ColorManager().get_colors()

In [None]:
df_oil = parquet_loader(Dataset.OIL)
df_items = parquet_loader(Dataset.ITEMS)
df_holidays = parquet_loader(Dataset.HOLIDAYS_EVENTS)
df_stores = parquet_loader(Dataset.STORES)
df_transactions = parquet_loader(Dataset.TRANSACTIONS)
df_train = parquet_loader(Dataset.TRAIN)

In [None]:
for element in Dataset:
    print(element, "\n", parquet_loader(element).head(), "\n")

In [None]:
df_train.info()

In [None]:
sale_day_store_level = df_train.groupby(["date", "store_nbr"])["unit_sales"].sum()

In [None]:
sale_day_store_level = df_train.groupby(["date", "store_nbr"], as_index=False)[
    "unit_sales"
].sum()

In [None]:
df_time = sale_day_store_level.groupby("date", as_index=False)["unit_sales"].sum()

In [None]:
fig = px.line(
    sale_day_store_level,
    x="date",
    y="unit_sales",
    markers=True,
    title="Total Unit Sales over Time",
)

fig.update_layout(xaxis_title="Date", yaxis_title="Unit Sales")

fig.show()

In [None]:
df_oil["date"] = pd.to_datetime(df_oil["date"])
sales_oil = (
    df_train.groupby("date")["unit_sales"].sum().reset_index()
)  # Aggregate daily sales
sales_oil = sales_oil.merge(df_oil, on="date", how="left")

fig = go.Figure()

# Unit Sales (linke y-Achse)
fig.add_trace(
    go.Scatter(
        x=sales_oil["date"],
        y=sales_oil["unit_sales"],
        name="Total Unit Sales",
        mode="lines",
        opacity=1.0,
        yaxis="y1",
    )
)

# Oil Prices (rechte y-Achse)
fig.add_trace(
    go.Scatter(
        x=sales_oil["date"],
        y=sales_oil["dcoilwtico"],
        name="Oil Prices",
        mode="lines",
        opacity=1.0,
        yaxis="y2",
    )
)

fig.update_layout(
    title="Daily Sales vs Oil Prices",
    xaxis={"title": "Date"},
    yaxis={
        "title": "Total Unit Sales",
        "side": "left",
    },
    yaxis2={
        "title": "Oil Prices",
        "overlaying": "y",
        "side": "right",
    },
    legend={"x": 0.01, "y": 0.99},
)

fig.show()
save_all(fig, "eda/daily_sales_vs_oil_prices")  # (overwrite = True)

In [None]:
store_sales = df_train.groupby("store_nbr")["unit_sales"].sum().reset_index()

# Sort stores by sales
store_sales = store_sales.sort_values(by="unit_sales", ascending=False)

store_sales = store_sales.copy()
store_sales["category"] = "Other"

top5_idx = store_sales.nlargest(5, "unit_sales").index
bottom5_idx = store_sales.nsmallest(5, "unit_sales").index

store_sales.loc[top5_idx, "category"] = "Top 5"
store_sales.loc[bottom5_idx, "category"] = "Bottom 5"

fig = px.bar(
    store_sales,
    x="store_nbr",
    y="unit_sales",
    color="category",
    title="Total Unit Sales Per Store (Top 5 & Bottom 5 Highlighted)",
    color_discrete_map={
        "Top 5": c.forecast,
        "Bottom 5": c.anomaly,
        "Other": c.border,
    },
)

fig.update_layout(
    xaxis_title="Store Number",
    yaxis_title="Total Sales",
    xaxis_tickangle=-90,
)

fig.show()

In [None]:
df_train["year"] = df_train["date"].dt.year
df_train["month"] = df_train["date"].dt.month

monthly_sales_by_year = (
    df_train.groupby(["year", "month"])["unit_sales"].sum().reset_index()
)


fig = px.line(
    monthly_sales_by_year,
    x="month",
    y="unit_sales",
    color="year",
    markers=True,
    title="Monthly Sales Trend Across Years",
)

fig.update_layout(
    xaxis={
        "title": "Month",
        "tickmode": "linear",
        "tick0": 1,
        "dtick": 1,
    },
    yaxis={"title": "Total Units Sold"},
    legend_title_text="Year",
)

fig.show()

In [None]:
# Create a sequential month-year column for better visualization
df_train["year_month"] = df_train["date"].dt.to_period("M")  # Format: YYYY-MM

# Aggregate sales by year-month
monthly_sales = df_train.groupby("year_month")["unit_sales"].sum().reset_index()

# Convert year_month to string for plotting
monthly_sales["year_month"] = monthly_sales["year_month"].astype(str)

monthly_sales_plot = monthly_sales.copy()
monthly_sales_plot["year_month"] = pd.to_datetime(monthly_sales_plot["year_month"])

fig = px.line(
    monthly_sales_plot,
    x="year_month",
    y="unit_sales",
    markers=True,
    title="Consecutive Monthly Sales Trend Over Years",
)

fig.update_layout(
    xaxis_title="Year-Month",
    yaxis_title="Total Sales",
)

fig.show()

In [None]:
daily_sales = df_train.groupby("date")["unit_sales"].sum().reset_index()
daily_sales["rolling_avg"] = daily_sales["unit_sales"].rolling(window=30).mean()
daily_sales["month"] = daily_sales["date"].dt.month

# Aggregate sales by month
monthly_sales = daily_sales.groupby("month")["unit_sales"].mean().reset_index()

fig = px.line(
    monthly_sales,
    x="month",
    y="unit_sales",
    markers=True,
    title="Monthly Seasonality in Sales",
)

fig.update_layout(
    xaxis_title="Month",
    yaxis_title="Average Sales",
    xaxis={
        "tickmode": "array",
        "tickvals": list(range(1, 13)),
        "ticktext": [
            "Jan",
            "Feb",
            "Mar",
            "Apr",
            "May",
            "Jun",
            "Jul",
            "Aug",
            "Sep",
            "Oct",
            "Nov",
            "Dec",
        ],
    },
)


fig.update_yaxes(
    tickformat=",.0f",
    exponentformat="none",
    showexponent="none",
)

fig.show()

In [None]:
# Extract day of the week from the date (Monday=0, Sunday=6)
daily_sales["day_of_week"] = daily_sales["date"].dt.dayofweek

# Aggregate sales by day of the week
weekly_sales = daily_sales.groupby("day_of_week")["unit_sales"].mean().reset_index()

fig = px.line(
    weekly_sales,
    x="day_of_week",
    y="unit_sales",
    markers=True,
    title="Weekly Seasonality in Sales",
)

fig.update_layout(
    xaxis_title="Day of Week",
    yaxis_title="Average Sales",
    xaxis={
        "tickmode": "array",
        "tickvals": list(range(7)),
        "ticktext": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
    },
)

# ‚úîÔ∏è Fix f√ºr gro√üe Zahlen (lesbare Achse)
fig.update_yaxes(
    tickformat=",.0f",
    exponentformat="none",
    showexponent="none",
)

fig.show()

In [None]:
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series
decomposition = seasonal_decompose(
    daily_sales.set_index("date")["unit_sales"], model="additive", period=365
)

# Zeitreihe vorbereiten (DatetimeIndex + sortiert)
ts = daily_sales.set_index("date")["unit_sales"].sort_index()

# Optional (falls L√ºcken existieren): ts = ts.asfreq("D").interpolate()

decomp = seasonal_decompose(ts, model="additive", period=365)

fig = make_subplots(
    rows=4,
    cols=1,
    shared_xaxes=True,
    subplot_titles=("unit_sales", "Trend", "Seasonal", "Residual"),
)

fig.add_trace(
    go.Scatter(x=ts.index, y=decomp.observed, mode="lines", name="Observed"),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=ts.index, y=decomp.trend, mode="lines", name="Trend"), row=2, col=1
)
fig.add_trace(
    go.Scatter(x=ts.index, y=decomp.seasonal, mode="lines", name="Seasonal"),
    row=3,
    col=1,
)
fig.add_trace(
    go.Scatter(x=ts.index, y=decomp.resid, mode="lines", name="Residual"), row=4, col=1
)

fig.update_layout(
    height=900,
    title="Seasonal Decomposition (Additive, period=365)",
    showlegend=False,
)

# gro√üe Zahlen besser lesbar
fig.update_yaxes(
    tickformat=",.0f", exponentformat="none", showexponent="none", row=1, col=1
)
fig.update_yaxes(
    tickformat=",.0f", exponentformat="none", showexponent="none", row=2, col=1
)

fig.show()

In [None]:
df = df_train.copy()
df["date"] = pd.to_datetime(df["date"])

df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.to_period("M")
df["week"] = df["date"].dt.to_period("W")
df["dow"] = df["date"].dt.dayofweek

In [None]:
df.head()

In [None]:
# df.isna().mean().sort_values(ascending=False)
# df['unit_sales'].describe()
(df["unit_sales"] < 0).sum()

Dimensionen & Abdeckung

‚û°Ô∏è Welche Stores / Items haben kurze Historien?

In [None]:
# df['store_nbr'].nunique()
# df['item_nbr'].nunique()
# df['date'].nunique()

df.groupby("store_nbr")["date"].agg(["min", "max"])

54 Stores 
    sp√§tere Er√∂ffnungen: 
    ‚Ä¢	store 53 ‚Üí 2014-05-29
	‚Ä¢	store 20 ‚Üí 2015-02-13
    ‚Ä¢	store 29 ‚Üí 2015-03-20
	‚Ä¢	store 21 ‚Üí 2015-07-24
    ‚Ä¢	store 42 ‚Üí 2015-08-21
	‚Ä¢	store 22 ‚Üí 2015-10-09
	‚Ä¢	store 52 ‚Üí 2017-04-20
	
4036 Items 

Store-Level EDA

In [None]:
store_daily = df.groupby(["store_nbr", "date"])["unit_sales"].sum().reset_index()
store_weekly = (
    df.groupby(["store_nbr", "year", "week"])["unit_sales"].sum().reset_index()
)
store_monthly = df.groupby(["store_nbr", "month"])["unit_sales"].sum()

In [None]:
import plotly.express as px

store_id = 2

df_store = store_daily[store_daily["store_nbr"] == store_id]

fig = px.line(
    df_store, x="date", y="unit_sales", title=f"Daily Unit Sales - Store {store_id}"
)

fig.show()

In [None]:
# optional: auf ein Jahr beschr√§nken
# df_2017 = store_daily[
#    store_daily["date"].dt.year == 2017
# ]

pivot = df.pivot_table(index="store_nbr", columns="date", values="unit_sales")

fig = px.imshow(
    pivot, aspect="auto", title="Store x Date Heatmap", color_continuous_scale="Viridis"
)

fig.show()

Verteilung der t√§glichen Ums√§tze 
üëâ erkennt Stores mit hoher Volatilit√§t

In [None]:
fig = px.box(
    store_daily,
    x="store_nbr",
    y="unit_sales",
    title="Distribution of Daily Unit Sales per Store",
)

fig.show()

# Durchschnittlicher Wochenverlauf 

In [None]:
df.head()

In [None]:
store_daily["week"] = store_daily["date"].dt.isocalendar().week

fig = px.box(
    store_daily,
    x="week",
    y="unit_sales",
    title="Unit Sales by Calendar Week of Year",
)

fig.show()

In [None]:
total_daily = store_daily.groupby("date")["unit_sales"].sum().reset_index()

fig = px.line(
    total_daily, x="date", y="unit_sales", title="Total Daily Unit Sales (All Stores)"
)

fig.show()

Item-Level EDA

In [None]:
item_daily = df.groupby(["item_nbr", "date"])["unit_sales"].sum().reset_index()
item_weekly = df.groupby(["item_nbr", "year", "week"])["unit_sales"].sum().reset_index()
item_monthly = df.groupby(["item_nbr", "month"])["unit_sales"].sum()

In [None]:
item_id = 103665  # Beispiel

df_item = item_daily[item_daily["item_nbr"] == item_id]

fig = px.line(
    df_item, x="date", y="unit_sales", title=f"Daily Unit Sales - Item {item_id}"
)
fig.show()

Nachfrageverteilung pro Item

In [None]:
fig = px.box(
    item_daily,
    x="item_nbr",
    y="unit_sales",
    title="Distribution of Daily Unit Sales per Item",
)
fig.show()

In [None]:
fig = px.histogram(
    df_item, x="unit_sales", nbins=50, title=f"Unit Sales Distribution - Item {item_id}"
)
fig.show()

In [None]:
item_daily["dow"] = item_daily["date"].dt.day_name()

fig = px.box(
    item_daily,
    x="dow",
    y="unit_sales",
    title="Unit Sales by Day of Week",
    category_orders={
        "dow": [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
    },
)
fig.show()

In [None]:
item_daily["week"] = item_daily["date"].dt.isocalendar().week

fig = px.box(
    item_daily, x="week", y="unit_sales", title="Unit Sales by Calendar Week of Year"
)
fig.show()

In [None]:
top_items = (
    item_daily.groupby("item_nbr", as_index=False)["unit_sales"]
    .sum()
    .sort_values("unit_sales", ascending=False)
    .head(20)
)

fig = px.bar(
    top_items,
    x="item_nbr",
    y="unit_sales",
    title="Top 20 Items by Total Unit Sales",
    text="item_nbr",  # absolute Item-Nummern auf den Balken
)

fig.update_layout(
    xaxis={
        "type": "category",  # verhindert automatische numerische Skalierung
        "categoryorder": "total descending",  # gleiche Reihenfolge wie die Top 20
    }
)

fig.update_traces(
    textposition="outside"  # Beschriftung gut lesbar
)

fig.show()

Store/Item-EDA 


In [None]:
store_item = df.groupby(["store_nbr", "item_nbr"])["unit_sales"].sum().reset_index()

Top Items pro Store 

In [None]:
store_id = 1

top_items_store = (
    store_item[store_item["store_nbr"] == store_id]
    .sort_values("unit_sales", ascending=False)
    .head(20)
)

fig = px.bar(
    top_items_store,
    x="item_nbr",
    y="unit_sales",
    title=f"Top 20 Items - Store {store_id}",
    text="item_nbr",  # absolute Itemnummern anzeigen
)

fig.update_layout(
    xaxis={
        "type": "category",  # erzwingt nur diese 20 Items
        "categoryorder": "total descending",
    }
)

fig.update_traces(textposition="outside")

fig.show()

In [None]:
def top_k_share(df, k=10):
    return (
        df.sort_values("unit_sales", ascending=False).head(k)["unit_sales"].sum()
        / df["unit_sales"].sum()
    )


concentration = (
    store_item.groupby("store_nbr").apply(top_k_share).reset_index(name="top10_share")
)

fig = px.bar(
    concentration,
    x="store_nbr",
    y="top10_share",
    title="Share of Sales from Top 10 Items per Store",
)
fig.show()

In [None]:
fig = px.box(
    store_item,
    x="store_nbr",
    y="unit_sales",
    title="Distribution of Item Sales per Store",
)
fig.update_yaxes(type="log")
fig.show()

Zero-Rate 

In [None]:
zero_rate_item = (
    item_daily.assign(is_zero=lambda x: x["unit_sales_x"] == 0)
    .groupby("item_nbr")["is_zero"]
    .mean()
    .reset_index(name="zero_rate")
)

In [None]:
top_n = 30

top_zero_items = zero_rate_item.sort_values("zero_rate", ascending=False).head(top_n)

fig = px.bar(
    top_zero_items,
    x="item_nbr",
    y="zero_rate",
    title=f"Top {top_n} Items by Zero-Rate",
    text="item_nbr",  # absolute Itemnummern anzeigen
)

fig.update_layout(
    xaxis={
        "type": "category",  # zeigt nur diese Items
        "categoryorder": "total descending",
    }
)

fig.update_traces(textposition="outside")

fig.update_yaxes(tickformat=".2%")
fig.show()

In [None]:
store_item_daily = (
    df_train.groupby(["store_nbr", "item_nbr", "date"]).agg("sum").reset_index()
)

In [None]:
store_item_daily = store_item_daily[["date", "store_nbr", "item_nbr", "unit_sales"]]

In [None]:
store_item_daily.head()

‚úÖ Zentrale Ausschluss-Kriterien (empfohlen)

1Ô∏è‚É£ Anzahl Verkaufstage (wichtigstes Kriterium)

Wie oft wurde √ºberhaupt verkauft?

üî¥ Ausschlie√üen, wenn:  n_sales_days < 30  üëâ Weniger als ~30 Verkaufstage ‚Üí kein stabiles Muster 

In [None]:
sales_days = (
    store_item_daily.groupby(["store_nbr", "item_nbr"])
    .apply(lambda x: (x["unit_sales"] > 0).sum())
    .reset_index(name="n_sales_days")
)

2Ô∏è‚É£ Zero-Rate (Intermittency)

Wie oft ist die Nachfrage null?

üî¥ Ausschlie√üen, wenn: zero_rate > 0.6  üëâ Mehr als 60 % Zero-Tage ‚Üí intermittente Nachfrage

In [None]:
df = store_item_daily.copy()
df["date"] = pd.to_datetime(df["date"])

# 1) Basis: t√§gliche Sales je store-item-date (falls du mehrere Zeilen pro Tag hast)
daily = df.groupby(["store_nbr", "item_nbr", "date"], as_index=False)[
    "unit_sales"
].sum()

# 2) Vollst√§ndigen Datumsbereich festlegen
all_dates = pd.date_range(daily["date"].min(), daily["date"].max(), freq="D")

# 3) Alle beobachteten store-item Kombis
pairs = daily[["store_nbr", "item_nbr"]].drop_duplicates()

# 4) Vollst√§ndiges Grid
grid = (
    pairs.assign(key=1)
    .merge(pd.DataFrame({"date": all_dates, "key": 1}), on="key")
    .drop(columns="key")
)

# 5) Rejoin + fehlende Sales = 0
full = grid.merge(daily, on=["store_nbr", "item_nbr", "date"], how="left")
full["unit_sales"] = full["unit_sales"].fillna(0)

# 6) Zero-Rate korrekt
zero_rate = (
    full.assign(is_zero=lambda d: d["unit_sales"].eq(0))
    .groupby(["store_nbr", "item_nbr"])["is_zero"]
    .mean()
    .reset_index(name="zero_rate")
)

3Ô∏è‚É£ Gesamtvolumen

Wie relevant ist das Item √ºberhaupt?

üî¥ Ausschlie√üen, wenn: total_sales < 50 üëâ Sonst √ºberfitten Modelle auf Rauschen

In [None]:
total_sales = (
    store_item_daily.groupby(["store_nbr", "item_nbr"])["unit_sales"]
    .sum()
    .reset_index(name="total_sales")
)

4Ô∏è‚É£ Zeitliche Abdeckung

√úber welchen Zeitraum gibt es Daten?

üî¥ Ausschlie√üen, wenn:  < 90 Tage Daten

In [None]:
coverage = (
    store_item_daily.groupby(["store_nbr", "item_nbr"])
    .agg(first_day=("date", "min"), last_day=("date", "max"))
    .reset_index()
)

üßÆ Kombinierter Quality-Score (sehr empfohlen)
        üëâ Nur forecastable == True prognostizieren 

In [None]:
quality = sales_days.merge(zero_rate, on=["store_nbr", "item_nbr"]).merge(
    total_sales, on=["store_nbr", "item_nbr"]
)

quality["forecastable"] = (
    (quality["n_sales_days"] >= 30)
    & (quality["zero_rate"] <= 0.6)
    & (quality["total_sales"] >= 50)
)

üìä Welche Visualisierungen machen Sinn?

1Ô∏è‚É£ Scatter: Zero-Rate vs. Sales-Tage (Pflicht)

üìå Beste √úbersicht
	‚Ä¢	x = n_sales_days
	‚Ä¢	y = zero_rate
	‚Ä¢	Farbe = forecastable

üëâ Trennung sofort sichtbar

2Ô∏è‚É£ Histogramme (Grenzwerte validieren)

a) Zero-Rate
	‚Ä¢	Wo liegt die Masse?
	‚Ä¢	Wo wird‚Äôs problematisch?

b) n_sales_days
	‚Ä¢	Wie viele Items sind ‚Äûtot‚Äú?

3Ô∏è‚É£ Bubble-Plot (optional, aber stark)
	‚Ä¢	x = n_sales_days
	‚Ä¢	y = zero_rate
	‚Ä¢	Gr√∂√üe = total_sales
üëâ Zeigt:
	‚Ä¢	seltene Bestseller
	‚Ä¢	h√§ufige Low-Volume Items

4Ô∏è‚É£ Time-Series Beispiele (qualitativ!)

Zeige:
	‚Ä¢	1 ‚Äûgutes‚Äú
	‚Ä¢	1 ‚Äûgrenzwertiges‚Äú
	‚Ä¢	1 ‚Äûschlechtes‚Äú

üëâ Super f√ºr Stakeholder & App-UX

In [None]:
# Vorbereitung (robust: IDs als String f√ºr saubere Achsen/Labels)

quality_plot = quality.copy()
quality_plot["store_nbr"] = quality_plot["store_nbr"].astype(str)
quality_plot["item_nbr"] = quality_plot["item_nbr"].astype(str)
quality_plot["key"] = quality_plot["store_nbr"] + " x " + quality_plot["item_nbr"]

In [None]:
# 1) Scatter: Zero-Rate vs Verkaufstage (Pflicht-Plot)

fig = px.scatter(
    quality_plot,
    x="n_sales_days",
    y="zero_rate",
    color="forecastable",
    hover_name="key",
    hover_data={"total_sales": True, "n_sales_days": True, "zero_rate": ":.2%"},
    title="Forecast-Eignung: Zero-Rate vs. Anzahl Verkaufstage",
)

fig.update_yaxes(tickformat=".0%")
fig.update_layout(
    xaxis_title="Anzahl Tage mit Verk√§ufen (unit_sales > 0)",
    yaxis_title="Zero-Rate (Anteil Tage mit unit_sales = 0)",
)
fig.show()

In [None]:
item_sales = (
    store_item_daily[store_item_daily["unit_sales"] > 0]
    .drop_duplicates(["date", "item_nbr"])
    .assign(sold=1)[["date", "item_nbr", "sold"]]
)

In [None]:
items_by_days_sold = (
    store_item_daily.loc[store_item_daily["unit_sales"] > 0]
    .groupby("item_nbr")["date"]
    .nunique()
    .reset_index(name="days_sold")
    .sort_values("days_sold", ascending=False)
)

# optional: als int
items_by_days_sold["days_sold"] = items_by_days_sold["days_sold"].astype("int32")

In [None]:
items_by_days_sold.tail(50)

In [None]:
fig = px.histogram(
    items_by_days_sold,
    x="days_sold",
    nbins=60,
    title="Distribution of Days Sold per Item",
    labels={"days_sold": "Days sold (unique dates with sales > 0)"},
)

fig.update_layout(yaxis_title="Number of Items")
fig.update_yaxes(type="log")  # <- wichtiger als log-x

fig.show()

In [None]:
sold = store_item_daily.loc[
    store_item_daily["unit_sales"] > 0, ["date", "store_nbr", "item_nbr", "unit_sales"]
]

In [None]:
store_item_stats = (
    sold.groupby(["store_nbr", "item_nbr"])
    .agg(
        days_sold=("date", "nunique"),  # an wie vielen Tagen verkauft
        total_units=("unit_sales", "sum"),  # Gesamtmenge
    )
    .reset_index()
)

In [None]:
days_cutoff = store_item_stats["days_sold"].quantile(0.10)
units_cutoff = store_item_stats["total_units"].quantile(0.10)

slow_movers = store_item_stats.query(
    "days_sold <= @days_cutoff and total_units <= @units_cutoff"
)

In [None]:
slow_movers = slow_movers.sort_values(["days_sold", "total_units"])

slow_movers.head(20)

In [None]:
slow_per_store = (
    slow_movers.groupby("store_nbr")
    .size()
    .reset_index(name="n_slow_items")
    .sort_values("n_slow_items", ascending=False)
)

In [None]:
fig = px.scatter(
    store_item_stats,
    x="days_sold",
    y="total_units",
    hover_data=["store_nbr", "item_nbr"],
    title="Store-Item Sales Performance",
)

fig.update_xaxes(type="log")
fig.update_yaxes(type="log")

fig.show()

# ‚û°Ô∏è unten links = selten verkauft, geringe st√ºckzahl