In [None]:
import pandas as pd
df = pd.read_csv(r"training_data_v2.csv")

In [None]:
df["BILLING_DATE"] = pd.to_datetime(df["BILLING_DATE"])

In [None]:
df.info()

In [None]:
material_groups = [
    "M-C","M-G","M-R","M-S",
    "P-C","P-F","P-G","P-R","P-S",
    "R-T",
    "DBF-PP","DBF-PPGF","DBF-CPV","DB-TPE",
    "M-A",
    "R03","R05",
    "S01","S04","S07"
]


In [None]:
df_filtered = df[df["MATERIAL_GROUP_T"].isin(material_groups)]

In [None]:
daily_agg = (
    df_filtered
    .groupby(["MATERIAL_GROUP_T", "BILLING_DATE"], as_index=False)
    ["TOTAL_BILLING_QTY_BASE_UNIT"]
    .sum()
)

In [None]:
import plotly.graph_objects as go

for group in material_groups:

    group_df = daily_agg[daily_agg["MATERIAL_GROUP_T"] == group]

    if group_df.empty:
        continue   

    # Sort is VERY IMPORTANT
    group_df = group_df.sort_values("BILLING_DATE")

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=group_df["BILLING_DATE"],
            y=group_df["TOTAL_BILLING_QTY_BASE_UNIT"],
            mode="lines+markers",   
            name=group
        )
    )

    fig.update_layout(
        title=f"Daily Total Billing Qty - {group}",
        xaxis_title="Billing Date",
        yaxis_title="Total Billing Qty",
        template="plotly_white"
    )

    fig.show()

In [None]:
present_groups = daily_agg["MATERIAL_GROUP_T"].unique()

missing = set(material_groups) - set(present_groups)

print("Missing Groups:", missing)


In [None]:
daily_counts = (
    daily_agg[daily_agg["MATERIAL_GROUP_T"].isin(material_groups)]
    .groupby("MATERIAL_GROUP_T")["BILLING_DATE"]
    .nunique()
    .reset_index(name="DAILY_DATAPOINTS")
)

daily_counts

In [None]:
Qualified_material_groups = [
    "M-C","M-G","M-R","M-S",
    "P-C","P-F","P-G","P-R","P-S",
    "R-T"
]