# TimeSeries variables


In [1]:
#Run in env Python3716 or HandelA2024B

# Import packages
import plotly.express as px
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import numpy as np
from scipy.stats import boxcox
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.linear_model import LinearRegression

## Read data

In [2]:
# Read in the data
data_all_years = pd.read_csv('Product_SubGroup_Date_2022-2025.csv')
data_all_years['Date'] = pd.to_datetime(data_all_years['Date'])


# filter OUT years 
years_to_exclude = [2025] 

# Filter the DataFrame
data_all = data_all_years.loc[~data_all_years['Date'].dt.year.isin(years_to_exclude)]


## Functions

In [3]:
# Function box_cox
def calculate_box_cox(group):
    if (group['Quantity'] <= 0).any():
        return None
    _, lam = boxcox(group['Quantity'])
    return lam


In [4]:
def adf_test(series):
    """Using an ADF test to determine if a series is stationary"""
    test_results = adfuller(series)
    
    p_value = test_results[1]

    if pd.notnull(p_value):
        return test_results[1]
    else:
        return 0

In [5]:
def Decomposed_regression(series):
    # Decompose (must set period if not inferring freq)
    decomposition = seasonal_decompose(series, model='additive', period=12)
    
    # Extract trend
    trend = decomposition.trend.dropna()  # Drop NaNs at edges
    
    # Prepare data for regression
    X = np.arange(len(trend)).reshape(-1, 1)  # Independent variable: time step
    y = trend.values.reshape(-1, 1)          # Dependent variable: trend values
    
    # Fit linear regression
    model = LinearRegression()
    model.fit(X, y)
    
    # Predict trend line
    trend_pred = model.predict(X)
    
    # Get slope & intercept
    slope = model.coef_[0][0]
    #intercept = model.intercept_[0]
    return slope

## Staging

In [6]:
# Find unique product values
unique_products=data_all['Product_SubGroup'].unique()
# Show result
print(unique_products)
print()

# Create result DataFrame
result_products = pd.DataFrame({'Product': unique_products})
result_products['BoxCox'] = None
result_products['ADF p'] = None
result_products['Decomposed_regression'] = None
print(result_products)

[1051 1052 1040 1010 1020 1013 1012]

   Product BoxCox ADF p Decomposed_regression
0     1051   None  None                  None
1     1052   None  None                  None
2     1040   None  None                  None
3     1010   None  None                  None
4     1020   None  None                  None
5     1013   None  None                  None
6     1012   None  None                  None


## Run functions over all products

In [7]:
# Loop through each product and calculate result
for product in result_products['Product']:
    group = data_all[data_all['Product_SubGroup'] == product]
    result = calculate_box_cox(group)
    result_products.loc[result_products['Product'] == product, 'BoxCox'] = result
    #print(result_products)
    print()
    print(product)
    group["Quantity_Season_Diff"] = group["Quantity"].diff(periods=12)
    result_products.loc[result_products['Product'] == product, 'ADF p'] = adf_test(group["Quantity_Season_Diff"][12:])
    result_products.loc[result_products['Product'] == product, 'Decomposed_regression'] = Decomposed_regression(group['Quantity'])

# Show result
print(result_products)


1051

1052

1040

1010

1020

1013

1012
   Product    BoxCox     ADF p Decomposed_regression
0     1051  0.571231  0.308784              -6.56404
1     1052  0.732822  0.110572          -1477.477808
2     1040  0.076982  0.062842          -5712.012156
3     1010  0.093423  0.076948          -13885.33875
4     1020 -0.012723  0.005244         -23594.366141
5     1013 -0.418742  0.373996         -49996.668569
6     1012  0.420235   0.39169         -77092.380616


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == "__main__":
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == "__main__":
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == "__main__":
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead



## Save result to file

In [8]:
result_products.to_excel('T1_TimeSeries_variables.xlsx', index=False)
print("✅ TimeSeries variables saved.")


✅ TimeSeries variables saved.
