# Time Series test

## Import Libraries

In [16]:

import sys
import os

import pandas as pd
import numpy as np

import sympy as sym
sym.init_printing()
from IPython.display import display, Math

from statsmodels.tsa.stattools import adfuller
from scipy.stats import boxcox
import plotly.express as px
import plotly.graph_objects as go


sys.path.append(os.path.abspath('..'))
from utils import find_missing_dates
from utils.fill_dates import fill_missing_dates
from utils.diff import custom_diff
from utils.log import custom_ln

#### SQL Query

select sale_date, concat(product_code, ' - ', product_description) as product, sum(total_sales) as sales
from sales
where sale_date between '1/1/2025' and '6/1/2025'
and store_number='440'
and sale_type in ('Refunded', 'Sale')
and item_ring_type in ('ITEM', 'SUBD')
and category = 90201
and product_code='28000600000.0'
group by sale_date, product_code, product_description
order by sale_date

## Import Data


In [17]:

ds = pd.read_csv('../data/store-440-fresh_ground_chuck.csv')

In [24]:
#ds.info()

In [22]:
#ds.describe()

In [3]:
ds.head()

Unnamed: 0,sale_date,product,sales
0,2025-02-24 00:00:00,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,78.96
1,2025-02-25 00:00:00,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,83.67
2,2025-02-26 00:00:00,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,28.13
3,2025-02-27 00:00:00,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,54.29
4,2025-02-28 00:00:00,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,57.99


## check data

In [18]:
startdate = ds['sale_date'].min()
enddate = ds['sale_date'].max()
missing = find_missing_dates(ds, "sale_date", startdate, enddate)
print(missing)
# This will occur if this item does not have sales for this day

[Timestamp('2025-04-11 00:00:00')]


In [19]:
# fill the missing dates
ds = fill_missing_dates(ds, "sale_date", 'sales', startdate, enddate, 0.01)

In [7]:
# check for values that are less than or equal to zero
ds[ds['sales'] <= 0]

Unnamed: 0,sale_date,product,sales


## Plot our data

In [20]:
# we are going to add the day of the week to make things easier to look at
ds['day_of_week'] = pd.to_datetime(ds['sale_date']).dt.day_name()

In [21]:
def plot(title, data, x, y, x_label, y_label):
    """ General function to plot data"""
    fig = px.line(
        data, 
        x=data[x], 
        y=data[y], 
        title='Store 440 Fresh Ground Chuck Sales',
        labels={x: x_label, y: y_label},
        hover_data={'sale_date': True, 'sales': True, 'day_of_week': True}
        )

    fig.update_layout(
        template='simple_white',
        font=dict(size=16),
        width=1400,
        height=450,
        title_x=0.5,
        xaxis=dict(
            tickangle=-45,
            tickformat="%b %d",
            showgrid=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        ),
        yaxis=dict(showgrid=True)
    )
    
    fig.show()

In [22]:
plot(title='Fresh Ground Chuck Price', data=ds, x='sale_date', y='sales',
     x_label='Date', y_label='Sales')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



<img src="../images/calendar-mar-jun_v1.png" alt="calendar" style="width:33%; display:block; margin:0 auto;" />

## Make the Time Series Stationary

#### run a differencing function

$$
    d(t) = y(t) - y(t-1)
$$

![diff](../images/diff.png)

In [23]:
# get the difference between y(t) and y(t-1)
ds['diff'] = ds['sales'].diff()
ds['custom_diff'] = custom_diff(ds['sales'].tolist())


In [24]:
ds.head()

Unnamed: 0,sale_date,product,sales,day_of_week,diff,custom_diff
0,2025-02-24,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,78.96,Monday,,
1,2025-02-25,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,83.67,Tuesday,4.71,4.71
2,2025-02-26,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,28.13,Wednesday,-55.54,-55.54
3,2025-02-27,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,54.29,Thursday,26.16,26.16
4,2025-02-28,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,57.99,Friday,3.7,3.7


In [25]:
# 🤘 this time we want to plot the difference. of course, we are going to use our implementation because it rocks!!
plot(title='Fresh Ground Chuck Price', data=ds, x='sale_date', y='custom_diff',
     x_label='Date', y_label='Sales')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



<img src="../images/calendar-mar-jun_v1.png" alt="calendar" style="width:33%; display:block; margin:0 auto;" />

In [41]:
ds.head()

Unnamed: 0,sale_date,product,sales,day_of_week,diff,custom_diff
0,2025-02-24,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,78.96,Monday,,
1,2025-02-25,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,83.67,Tuesday,4.71,4.71
2,2025-02-26,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,28.13,Wednesday,-55.54,-55.54
3,2025-02-27,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,54.29,Thursday,26.16,26.16
4,2025-02-28,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,57.99,Friday,3.7,3.7


#### Logarithm Transform

In [26]:
ds['log'] = np.log(ds['sales'])

In [27]:
ds.head()

Unnamed: 0,sale_date,product,sales,day_of_week,diff,custom_diff,log
0,2025-02-24,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,78.96,Monday,,,4.368941
1,2025-02-25,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,83.67,Tuesday,4.71,4.71,4.42688
2,2025-02-26,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,28.13,Wednesday,-55.54,-55.54,3.336837
3,2025-02-27,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,54.29,Thursday,26.16,26.16,3.99434
4,2025-02-28,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,57.99,Friday,3.7,3.7,4.060271


Lets see how well our custom log function holds up

In [28]:
ds['custom_log'] = [custom_ln(x) for x in  ds['sales']]

y: 0.9749874937468734
y: 0.9763788827211527
y: 0.9313422588396841
y: 0.9638270935069633
y: 0.9660959484658417
y: 0.9771637360127883
y: 0.9808905025797822
y: 0.9749027481490776
y: 0.9862030905077263
y: 0.9806819279435912
y: 0.9741668819426504
y: 0.9422132331696041
y: 0.9717474219522532
y: 0.9571917808219178
y: 0.9832635983263598
y: 0.9806332913721313
y: 0.9678818050425566
y: 0.9819673609232711
y: 0.9832551908908238
y: 0.9736911339121284
y: 0.9703659801452067
y: 0.952729851099031
y: 0.9793260285300807
y: 0.96684350132626
y: 0.8823529411764706
y: 0.9785338628313834
y: 0.9775809886783993
y: 0.9888752920235844
y: 0.976870590956401
y: 0.953117674636662
y: 0.9580184718723762
y: 0.9198396793587175
y: 0.9722953317634021
y: 0.9815515173876949
y: 0.9706830841395485
y: 0.9673042341016839
y: 0.9854067858445823
y: 0.9775885253249664
y: 0.9864764351883156
y: 0.98149005090236
y: 0.979296066252588
y: 0.9627352338364077
y: 0.9702380952380952
y: 0.9825006562253915
y: 0.9728887081469432
y: 0.9806051202482

In [29]:
ds.head()

Unnamed: 0,sale_date,product,sales,day_of_week,diff,custom_diff,log,custom_log
0,2025-02-24,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,78.96,Monday,,,4.368941,4.367879
1,2025-02-25,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,83.67,Tuesday,4.71,4.71,4.42688,4.425395
2,2025-02-26,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,28.13,Wednesday,-55.54,-55.54,3.336837,3.336837
3,2025-02-27,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,54.29,Thursday,26.16,26.16,3.99434,3.994264
4,2025-02-28,28000600000.0 - FRESH GROUND CHUCK 80/20 LB,57.99,Friday,3.7,3.7,4.060271,4.060141


In [30]:
# we are going to plot the log and see what it looks like
plot(title='Fresh Ground Chuck Price', data=ds, x='sale_date', y='custom_log',
     x_label='Date', y_label='Sales')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## check for stationarity

#### Run the adfuller test

In [31]:
def adf_test(series):
    """Using an ADF test to determine if a series is stationary"""
    test_results = adfuller(series)
    print('ADF Statistic: ', test_results[0])
    print('P-Value: ', test_results[1])
    print('Critical Values: ')
    for thres, adf_stat in test_results[4].items():
        print('\t%s: %.2f' % (thres, adf_stat))

In [32]:
adf_test(ds["custom_log"][1:])

ADF Statistic:  -9.59883140604306
P-Value:  1.9452373054541786e-16
Critical Values: 
	1%: -3.50
	5%: -2.89
	10%: -2.58


#### Plot all the steps

In [35]:
def plot_v2(title, data, x, y, x_label, y_label):
    """ General function to plot data"""
    fig = px.line(
        data, 
        x=data[x], 
        y=data[y], 
        title='Store 440 Fresh Ground Chuck Sales',
        labels={x: x_label, y: y_label},
        hover_data={'sale_date': True, 'sales': True, 'day_of_week': True}
        )
    
    fig.add_trace(
        go.Scatter(
            x = data[x],
            y = data['custom_diff'],
            mode='lines',
            name='Difference',
            line = dict(dash='dashdot') # solid | dash | dot | dashdot
        )
    )

    fig.add_trace(
        go.Scatter(
            x = data[x],
            y = data['custom_log'],
            mode='lines',
            name='Logorithmic',
            line = dict(dash='dash') # solid | dash | dot | dashdot
        )
    )


    fig.update_layout(
        template='simple_white',
        font=dict(size=16),
        width=1400,
        height=450,
        title_x=0.5,
        xaxis=dict(
            tickangle=-45,
            tickformat="%b %d",
            showgrid=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        ),
        yaxis=dict(showgrid=True)
    )
    
    fig.show()

In [36]:
# we are going to plot the log and see what it looks like
plot_v2(title='Fresh Ground Chuck Price', data=ds, x='sale_date', y='sales',
     x_label='Date', y_label='Sales')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Correlations