In [56]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import os
import sys


sys.path.append(os.path.abspath('../..'))
from utils.db import get_db
from utils import find_missing_dates
from utils.fill_dates import fill_missing_dates
from utils.simple_moving_average import SMA

In [5]:
# connect to our database
engine = get_db()

In [10]:
query = """select sale_date, concat(product_code, ' - ', product_description) as product, sum(total_sales) as sales
from sales
where sale_date between '1/1/2025' and '6/1/2025'
and store_number='440'
and sale_type in ('Refunded', 'Sale')
and item_ring_type in ('ITEM', 'SUBD')
and product_code='84023020003.0'
group by sale_date, product_code, product_description
order by sale_date"""


In [41]:
ds = pd.read_sql(query, engine)

2025-06-06 14:21:17,506 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-06-06 14:21:17,506 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname_1)s
2025-06-06 14:21:17,506 INFO sqlalchemy.engine.Engine [cached since 794.5s ago] {'table_name': "select sale_date, concat(product_code, ' - ', product_description) as product, sum(total_sales) as sales\nfrom sales\nwhere sale_date between '1/1/20 ... (81 characters truncated) ... \nand item_ring_type in ('ITEM', 'SUBD')\nand product_code='84023020003.0'\ngroup by sale_date, product_code, product_description\norder by sale_date", 'param_1': 

In [29]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   sale_date  93 non-null     datetime64[ns]
 1   product    93 non-null     object        
 2   sales      93 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 2.3+ KB


In [35]:
ds.head()

Unnamed: 0,sale_date,product,sales,DOW
0,2025-02-24,84023020003.0 - AM CH MILK 1% GAL 128 OZ,25.83,Monday
1,2025-02-25,84023020003.0 - AM CH MILK 1% GAL 128 OZ,42.06,Tuesday
2,2025-02-26,84023020003.0 - AM CH MILK 1% GAL 128 OZ,29.52,Wednesday
3,2025-02-27,84023020003.0 - AM CH MILK 1% GAL 128 OZ,7.38,Thursday
4,2025-02-28,84023020003.0 - AM CH MILK 1% GAL 128 OZ,18.45,Friday


In [42]:
missing = find_missing_dates(ds, 'sale_date', ds['sale_date'].min(), ds['sale_date'].max())
missing

[Timestamp('2025-03-14 00:00:00'),
 Timestamp('2025-03-24 00:00:00'),
 Timestamp('2025-04-02 00:00:00'),
 Timestamp('2025-04-11 00:00:00'),
 Timestamp('2025-05-21 00:00:00')]

In [43]:
ds = fill_missing_dates(ds, 'sale_date', 'sales', ds['sale_date'].min(), ds['sale_date'].max(), 0.01 )

In [23]:
# do we have any zero days
ds[ds['sales'] == 0]

Unnamed: 0,sale_date,product,sales


In [44]:
# lets add the day of the week name
ds['DOW'] = ds['sale_date'].dt.day_name()

In [36]:
ds['sale_date'] = pd.to_datetime(ds['sale_date'])
ds = ds.set_index('sale_date')

In [74]:
def plot(title, data, x, y, x_label, y_label):
    """ General function to plot data"""
    fig = px.line(
        data, 
        x=data[x], 
        y=data[y], 
        title='Milk',
        labels={x: x_label, y: y_label},
        hover_data={'sale_date': True, 'sales': True, 'DOW': True}
        )

    fig.update_layout(
        template='simple_white',
        font=dict(size=16),
        width=1400,
        height=450,
        title_x=0.5,
        xaxis=dict(
            tickangle=-45,
            tickformat="%b %d",
            showgrid=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        ),
        yaxis=dict(showgrid=True)
    )
    
    fig.show()

In [75]:
plot(title='Milk Sales', data=ds, x='sale_date', y='sales', x_label='Date', y_label='Sales')



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [53]:
ds['ma-7'] = ds['sales'].rolling(window=7).mean()
ds.head(10)

Unnamed: 0,sale_date,product,sales,DOW,ma-7
0,2025-02-24,84023020003.0 - AM CH MILK 1% GAL 128 OZ,25.83,Monday,
1,2025-02-25,84023020003.0 - AM CH MILK 1% GAL 128 OZ,42.06,Tuesday,
2,2025-02-26,84023020003.0 - AM CH MILK 1% GAL 128 OZ,29.52,Wednesday,
3,2025-02-27,84023020003.0 - AM CH MILK 1% GAL 128 OZ,7.38,Thursday,
4,2025-02-28,84023020003.0 - AM CH MILK 1% GAL 128 OZ,18.45,Friday,
5,2025-03-01,84023020003.0 - AM CH MILK 1% GAL 128 OZ,22.14,Saturday,
6,2025-03-02,84023020003.0 - AM CH MILK 1% GAL 128 OZ,40.59,Sunday,26.567143
7,2025-03-03,84023020003.0 - AM CH MILK 1% GAL 128 OZ,18.45,Monday,25.512857
8,2025-03-04,84023020003.0 - AM CH MILK 1% GAL 128 OZ,52.02,Tuesday,26.935714
9,2025-03-05,84023020003.0 - AM CH MILK 1% GAL 128 OZ,33.21,Wednesday,27.462857


In [68]:
ds['ma-7-mine'] = SMA(ds['sales'], 2)
ds.head(10)

Unnamed: 0,sale_date,product,sales,DOW,ma-7,ma-7-mine
0,2025-02-24,84023020003.0 - AM CH MILK 1% GAL 128 OZ,25.83,Monday,,
1,2025-02-25,84023020003.0 - AM CH MILK 1% GAL 128 OZ,42.06,Tuesday,,33.945
2,2025-02-26,84023020003.0 - AM CH MILK 1% GAL 128 OZ,29.52,Wednesday,,35.79
3,2025-02-27,84023020003.0 - AM CH MILK 1% GAL 128 OZ,7.38,Thursday,,18.45
4,2025-02-28,84023020003.0 - AM CH MILK 1% GAL 128 OZ,18.45,Friday,,12.915
5,2025-03-01,84023020003.0 - AM CH MILK 1% GAL 128 OZ,22.14,Saturday,,20.295
6,2025-03-02,84023020003.0 - AM CH MILK 1% GAL 128 OZ,40.59,Sunday,26.567143,31.365
7,2025-03-03,84023020003.0 - AM CH MILK 1% GAL 128 OZ,18.45,Monday,25.512857,29.52
8,2025-03-04,84023020003.0 - AM CH MILK 1% GAL 128 OZ,52.02,Tuesday,26.935714,35.235
9,2025-03-05,84023020003.0 - AM CH MILK 1% GAL 128 OZ,33.21,Wednesday,27.462857,42.615


In [73]:
def plot_v2(title, data, x, y, x_label, y_label):
    """ General function to plot data"""
    fig = px.line(
        data, 
        x=data[x], 
        y=data[y], 
        title='Milk',
        labels={x: x_label, y: y_label},
        hover_data={'sale_date': True, 'sales': True, 'DOW': True}
        )
    
    fig.add_trace(
        go.Scatter(
            x=data[x],
            y=data['ma-7-mine'],
            mode='lines',
            name = '2 Day Moving Average',
            line = dict(dash='solid')
        )
    )

    fig.update_layout(
        template='simple_white',
        font=dict(size=16),
        width=1400,
        height=450,
        title_x=0.5,
        xaxis=dict(
            tickangle=-45,
            tickformat="%b %d",
            showgrid=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        ),
        yaxis=dict(showgrid=True)
    )
    
    fig.show()

In [72]:
plot_v2(title='Milk', data=ds, x='sale_date', y='sales',
     x_label='Date', y_label='Sales')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

