# Merge data from all steps



In [1]:
import warnings; 
warnings.simplefilter('ignore')

from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.svm import LinearSVC

from typing import Dict, List
from prophet import Prophet

import plotly.graph_objects as go

from scipy.stats import boxcox
from scipy.special import inv_boxcox
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX


## Load data

In [2]:
# --- Load result ---
variables_df = pd.read_excel('T1_TimeSeries_variables.xlsx') 
cluster_df = pd.read_excel('T2_clustered_products.xlsx')
mape_table = pd.read_excel('T3_mape_summary_by_product.xlsx')   

## Merge data

In [3]:
# --- Merge info ---
merged_df = pd.merge(
    mape_table,
    variables_df[['Product', 'BoxCox', 'ADF p', 'Decomposed_regression']],
    left_on='Product_SubGroup',
    right_on='Product',
    how='left'
)

merged_df = merged_df.drop(columns=['Product'])

merged_df = pd.merge(
    merged_df,
    cluster_df[['Product', 'Cluster']],
    left_on='Product_SubGroup',
    right_on='Product',
    how='left'
)

merged_df = merged_df.drop(columns=['Product'])

print(merged_df)

   Product_SubGroup  MAPE_ARIMA  MAPE_SARIMA  MAPE_Prophet    BoxCox  \
0              1051       40.19        33.74         35.82  0.571231   
1              1052       79.15        23.37         27.35  0.732822   
2              1040       50.43        16.93         22.30  0.076982   
3              1010       41.19        14.84         80.91  0.093423   
4              1020       46.13        40.07         76.27 -0.012723   
5              1013       45.27        73.17        196.55 -0.418742   
6              1012       42.33        40.62         58.52  0.420235   

      ADF p  Decomposed_regression  Cluster  
0  0.308784              -6.564040        0  
1  0.110572           -1477.477808        0  
2  0.062842           -5712.012156        3  
3  0.076948          -13885.338750        3  
4  0.005244          -23594.366141        3  
5  0.373996          -49996.668569        2  
6  0.391690          -77092.380616        1  


## Print merged data to file

In [5]:
# --- Export merged result ---
merged_df.to_excel('T4_summary.xlsx', index=False)
print("✅ Summary data saved.")

✅ Summary data saved.
