In [22]:
from db.helpers import new_sales_collection

In [31]:
def generate_pipeline(year: str):
    return {
        f"{year}": {
            "$sum": {
                "$cond": [{"$eq": ["$Sales_Year", int(year)]}, "$Monthly_Sales", 0]
            }
        },
        f"{year}_month": {
            "$sum": {"$cond": [{"$eq": ["$Sales_Year", int(year)]}, 1, 0]}
        },
    }


def calculate_growth(value1, value2):
    if value1 == 0:
        return None
    else:
        growth = (value2 - value1) / value1
        return growth


def year_to_year_growth():
    data = new_sales_collection.aggregate(
        [
            {
                "$group": {
                    "_id": "$Reference_Full_ID",
                    **generate_pipeline("2018"),
                    **generate_pipeline("2019"),
                    **generate_pipeline("2020"),
                    **generate_pipeline("2021"),
                    **generate_pipeline("2022"),
                    **generate_pipeline("2023"),
                }
            }
        ]
    )
    data = list(data)
    return data


def average_outlet_sales_generated_vs_actual():
    data = new_sales_collection.aggregate(
        [
            {"$match": {"Monthly_Sales": {"$nin": [None, 0]}}},
            {
                "$group": {
                    "_id": "$Reference_Full_ID",
                    "generated": {
                        "$avg": {
                            "$cond": [
                                {"$eq": ["$Researcher", "Mahmoud"]},
                                "$Monthly_Sales",
                                0,
                            ]
                        }
                    },
                    "actual": {
                        "$avg": {
                            "$cond": [
                                {"$ne": ["$Researcher", "Mahmoud"]},
                                "$Monthly_Sales",
                                0,
                            ]
                        }
                    },
                }
            },
            {"$match": {"actual": {"$ne": 0}, "generated": {"$ne": 0}}},
        ]
    )
    data = list(data)
    return data


def anomalies():
    reference_ids = set()
    for record in year_to_year_growth():
        reference_full_id = record["_id"]
        growth = calculate_growth(record["2020"], record["2021"])
        if growth and abs(growth) > 2:
            reference_ids.add(reference_full_id)
    for record in average_outlet_sales_generated_vs_actual():
        reference_full_id = record["_id"]
        growth = calculate_growth(record["generated"], record["actual"])
        if growth and abs(growth) > 2:
            reference_ids.add(reference_full_id)
    return list(reference_ids)

In [35]:
from prophet.plot import plot_plotly
import pandas as pd
from prophet import Prophet


def prophet_forecast_model(df: pd.DataFrame, key):
    tmp = df[[key, "Sales_Period"]].copy()
    tmp = tmp.set_index("Sales_Period")
    tmp = tmp.reset_index()[["Sales_Period", key]].rename(
        columns={"Sales_Period": "ds", key: "y"}
    )
    model = Prophet(
        # seasonality_mode="multiplicative",
        # weekly_seasonality=True,
        yearly_seasonality=25,
        # seasonality_prior_scale=30,
    )
    return model.fit(tmp)


def get_prediction(model: Prophet, number_of_months):
    future_dates = model.make_future_dataframe(periods=number_of_months, freq="MS")
    predictions = model.predict(future_dates)
    return predictions


reference_ids = anomalies()
# for i in reference_ids:
id = reference_ids[0]
print(id)
data = list(new_sales_collection.find({"Reference_Full_ID": id}))
df = pd.DataFrame(data)
model = prophet_forecast_model(df, "Monthly_Sales")
forecast = get_prediction(model, 1)
plot_plotly(model, forecast)
# break
"""
in this example there are gaps in 
- Weekday_Delivery_Sales
- Weekend_Store_Sales
- Weekend_Delivery_Sales
"""

12:32:11 - cmdstanpy - INFO - Chain [1] start processing


Foodservice 132295


12:32:13 - cmdstanpy - INFO - Chain [1] done processing

The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

