In [1]:
import os
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
from tqdm import tqdm

warnings.filterwarnings("ignore")

FILE = "consolidated_file_cleaned_v2.csv"
TARGET = "sold/m"
DATE_COL = "time"
CATEGORY_COL = "second-level_category"

# ========== Load & preprocess ==========
df = pd.read_csv(FILE)
df[DATE_COL] = pd.to_datetime(df[DATE_COL])
df = df[[DATE_COL, CATEGORY_COL, TARGET]].dropna()

# Convert numeric field safely
df[TARGET] = pd.to_numeric(df[TARGET], errors="coerce").fillna(0)

# Aggregate monthly
monthly = df.groupby([CATEGORY_COL, pd.Grouper(key=DATE_COL, freq="MS")])[TARGET].sum().reset_index()

# Pivot per category series
categories = monthly[CATEGORY_COL].unique()

os.makedirs("sarima_results", exist_ok=True)

all_resids = []

print("\n=== Training SARIMA per Category ===\n")
for cat in tqdm(categories):
    series = monthly[monthly[CATEGORY_COL] == cat].set_index(DATE_COL)[TARGET].asfreq("MS")

    # Skip extremely short series
    if len(series) < 18:
        continue

    # Auto seasonal order for monthly (12-month season)
    # You can adjust orders if needed for accuracy
    try:
        model = sm.tsa.statespace.SARIMAX(
            series,
            order=(1,1,1),
            seasonal_order=(1,1,1,12),
            enforce_stationarity=False,
            enforce_invertibility=False
        ).fit(disp=False)

        model.save(f"sarima_results/{cat}.pkl")

        resid = model.resid.dropna()
        tmp = pd.DataFrame({
            CATEGORY_COL: cat,
            "date": resid.index,
            "residual": resid.values
        })
        all_resids.append(tmp)
    except:
        pass

res_df = pd.concat(all_resids, ignore_index=True)
res_df.to_csv("sarima_residuals.csv", index=False)

print("\n✨ SARIMA Training Complete!")
print("Residuals file saved as: sarima_residuals.csv")


=== Training SARIMA per Category ===



100%|██████████| 215/215 [00:11<00:00, 18.37it/s]


✨ SARIMA Training Complete!
Residuals file saved as: sarima_residuals.csv





In [2]:
# --- Evaluation Metrics: RMSE, MAE, MAPE ---
from sklearn.metrics import mean_squared_error, mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    nonzero = y_true != 0
    return np.mean(np.abs((y_true[nonzero] - y_pred[nonzero]) / y_true[nonzero])) * 100 if np.any(nonzero) else np.nan

# Store metrics for each category
sarima_metrics = []

for cat in tqdm(categories):
    series = monthly[monthly[CATEGORY_COL] == cat].set_index(DATE_COL)[TARGET].asfreq("MS")
    if len(series) < 18:
        continue
    try:
        model = sm.tsa.statespace.SARIMAX(
            series,
            order=(1,1,1),
            seasonal_order=(1,1,1,12),
            enforce_stationarity=False,
            enforce_invertibility=False
        ).fit(disp=False)
        model.save(f"sarima_results/{cat}.pkl")
        resid = model.resid.dropna()
        tmp = pd.DataFrame({
            CATEGORY_COL: cat,
            "date": resid.index,
            "residual": resid.values
        })
        all_resids.append(tmp)
        # --- Evaluation ---
        y_true = series[-len(resid):]
        y_pred = series[-len(resid):] - resid
        rmse = mean_squared_error(y_true, y_pred, squared=False)
        mae = mean_absolute_error(y_true, y_pred)
        mape = mean_absolute_percentage_error(y_true, y_pred)
        sarima_metrics.append({'category': cat, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape})
        print(f"Category: {cat}")
        print(f"  RMSE: {rmse:.4f}")
        print(f"  MAE: {mae:.4f}")
        print(f"  MAPE: {mape:.2f}%\n")
    except:
        pass

metrics_df = pd.DataFrame(sarima_metrics)
metrics_df.head()

100%|██████████| 215/215 [00:11<00:00, 18.24it/s]


In [4]:
# --- Evaluation Metrics (Loading Pre-trained Models) ---
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    nonzero = y_true != 0
    return np.mean(np.abs((y_true[nonzero] - y_pred[nonzero]) / y_true[nonzero])) * 100 if np.any(nonzero) else np.nan

# Evaluate pre-trained models
sarima_metrics = []

for cat in tqdm(categories):
    model_path = f"sarima_results/{cat}.pkl"
    
    # Skip if model not trained
    if not os.path.exists(model_path):
        continue
    
    try:
        # Load pre-trained model
        with open(model_path, 'rb') as f:
            model = pickle.load(f)
        
        # Get series
        series = monthly[monthly[CATEGORY_COL] == cat].set_index(DATE_COL)[TARGET].asfreq("MS")
        if len(series) < 18:
            continue
        
        # Calculate residuals
        resid = model.resid.dropna()
        
        # Evaluation
        y_true = series[-len(resid):]
        y_pred = series[-len(resid):] - resid
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)
        mape = mean_absolute_percentage_error(y_true, y_pred)
        
        sarima_metrics.append({'category': cat, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape})
        print(f"Category: {cat}")
        print(f"  RMSE: {rmse:.4f}")
        print(f"  MAE: {mae:.4f}")
        print(f"  MAPE: {mape:.2f}%\n")
    except Exception as e:
        print(f"Error evaluating {cat}: {e}")
        continue

metrics_df = pd.DataFrame(sarima_metrics)
print(f"\n✅ Evaluation complete for {len(metrics_df)} categories")
metrics_df.head(10)

 20%|█▉        | 42/215 [00:00<00:00, 201.75it/s]

Category: Accessories
  RMSE: 3077449.5050
  MAE: 1738736.6760
  MAPE: 51.40%

Category: Accessories Sets & Packages
  RMSE: 34300.8912
  MAE: 21365.3868
  MAPE: 117.41%

Category: Additional Accessories
  RMSE: 595147.0829
  MAE: 483035.3691
  MAPE: 66.56%

Category: Alcoholic Beverages
  RMSE: 18645.9715
  MAE: 9828.9450
  MAPE: 95.61%

Category: Amplifiers & Mixers
  RMSE: 19603.3076
  MAE: 13893.1430
  MAPE: 617.40%

Category: Anklets
  RMSE: 5343.5734
  MAE: 3665.2118
  MAPE: 76.06%

Category: Art Supplies
  RMSE: 208959.1584
  MAE: 159403.3837
  MAPE: 46.78%

Category: Audio & Video Cables & Converters
  RMSE: 67952.5000
  MAE: 28619.7282
  MAPE: 120.32%

Category: Automobile Exterior Accessories
  RMSE: 70755.4349
  MAE: 50403.2964
  MAPE: 66.20%

Category: Automobile Interior Accessories
  RMSE: 228563.7796
  MAE: 105371.9308
  MAPE: 30.92%

Category: Automobile Spare Parts
  RMSE: 845283.5023
  MAE: 601017.5448
  MAPE: 398.30%

Category: Automotive Care
  RMSE: 230851.1504
  M

 29%|██▉       | 63/215 [00:00<00:00, 203.11it/s]

Category: Camera Care
  RMSE: 12076.2658
  MAE: 5482.3869
  MAPE: 56.16%

Category: Cameras
  RMSE: 8256.5656
  MAE: 4907.2873
  MAPE: 146.93%

Category: Clutches
  RMSE: 0.0000
  MAE: 0.0000
  MAPE: nan%

Category: Clutches & Wristlets
  RMSE: 25084.6114
  MAE: 12934.2397
  MAPE: 125.07%

Category: Collectible Items
  RMSE: 68216.8016
  MAE: 47652.7369
  MAPE: 46.42%

Category: Console Accessories
  RMSE: 76507.8559
  MAE: 55470.5421
  MAPE: 114.51%

Category: Console Machines
  RMSE: 14044.4587
  MAE: 8112.4512
  MAPE: 110.97%

Category: Cooking Essentials
  RMSE: 71122.4039
  MAE: 48912.9794
  MAPE: 44.30%

Category: Costumes
  RMSE: 3886.1918
  MAE: 2522.6787
  MAPE: 65.78%

Category: Crossbody & Shoulder Bags
  RMSE: 923754.7224
  MAE: 747467.2800
  MAPE: 212.18%

Category: Dairy & Eggs
  RMSE: 105848.3194
  MAE: 64411.4809
  MAPE: 50.28%

Category: Data Storage
  RMSE: 32659.2984
  MAE: 22585.4161
  MAPE: 49.79%

Category: Decoration
  RMSE: 1542747761.4303
  MAE: 647140355.0526


 48%|████▊     | 104/215 [00:00<00:00, 177.06it/s]

Category: Flats
  RMSE: 125742.5567
  MAE: 96971.6427
  MAPE: 125.60%

Category: Food Staples
  RMSE: 195576.4163
  MAE: 127637.0952
  MAPE: 127.82%

Category: Food Supplement
  RMSE: 222171.5290
  MAE: 160054.7462
  MAPE: 43.76%

Category: Fresh & Frozen Food
  RMSE: 5527.3311
  MAE: 4030.4855
  MAPE: 107.16%

Category: Furniture
  RMSE: 331247.4808
  MAE: 250338.2594
  MAPE: 61.36%

Category: Gardening
  RMSE: 178131.6684
  MAE: 128862.0181
  MAPE: 43.58%

Category: Gift & Wrapping
  RMSE: 1245276.1461
  MAE: 845134.7778
  MAPE: 63.18%

Category: Gift Set & Hampers
  RMSE: 17504.6244
  MAE: 9733.1489
  MAPE: 93.59%

Category: Gift Sets & Packages
  RMSE: 4662.2810
  MAE: 3021.0020
  MAPE: 110.78%

Category: Girl Clothes
  RMSE: 819101.9434
  MAE: 574362.7357
  MAPE: 148.66%

Category: Girl Shoes
  RMSE: 60965.9601
  MAE: 42749.3179
  MAPE: 56.64%

Category: Gloves
  RMSE: 24867.2655
  MAE: 9855.5468
  MAPE: 68.55%

Category: Hair Accessories
  RMSE: 869896.5893
  MAE: 474263.3084
  M

 66%|██████▌   | 142/215 [00:00<00:00, 175.29it/s]

Category: Lingerie & Underwear
  RMSE: 1431102.1806
  MAE: 990931.1057
  MAPE: 75.66%

Category: Litter & Toilet
  RMSE: 40398680.9162
  MAE: 24539093.4627
  MAPE: 3680.49%

Category: Loafers & Boat Shoes
  RMSE: 9370.1898
  MAE: 5282.2209
  MAPE: 80.75%

Category: Luggage
  RMSE: 11025.8314
  MAE: 7477.7789
  MAPE: 72.87%

Category: Magazines & Newspaper
  RMSE: 961.6030
  MAE: 491.1881
  MAPE: 63.45%

Category: Makeup
  RMSE: 2715928.1980
  MAE: 1601592.4713
  MAPE: 68.50%

Category: Maternity Accessories
  RMSE: 5345.1928
  MAE: 4041.6337
  MAPE: 129.70%

Category: Maternity Healthcare
  RMSE: 9862.0661
  MAE: 6966.5545
  MAPE: 136.09%

Category: Maternity Wear
  RMSE: 126220.3482
  MAE: 49288.0495
  MAPE: 187.05%

Category: Media Players
  RMSE: 3677.2260
  MAE: 2275.2465
  MAPE: 74.61%

Category: Medical Supplies
  RMSE: 4379147.6802
  MAE: 3319198.3434
  MAPE: 120.34%

Category: Men Muslim Wear
  RMSE: 3029.6689
  MAE: 524.2099
  MAPE: 100.00%

Category: Men Watches
  RMSE: 78082

 83%|████████▎ | 179/215 [00:01<00:00, 173.14it/s]

Category: Peripherals & Accessories
  RMSE: 149103.7719
  MAE: 121198.1786
  MAPE: 179.04%

Category: Personal Care
  RMSE: 35486096.2645
  MAE: 17057161.5268
  MAPE: 1709.67%

Category: Pet Accessories
  RMSE: 19482203.0586
  MAE: 9896369.7379
  MAPE: 2748.65%

Category: Pet Clothing & Accessories
  RMSE: 61423.7686
  MAE: 38691.2319
  MAPE: 82.28%

Category: Pet Food
  RMSE: 98335962.2956
  MAE: 45631716.9855
  MAPE: 3367.83%

Category: Pet Grooming
  RMSE: 478679.2203
  MAE: 272550.4293
  MAPE: 131.00%

Category: Pet Healthcare
  RMSE: 115967.1226
  MAE: 82102.0443
  MAPE: 36.75%

Category: Photo Albums
  RMSE: 112795.2716
  MAE: 53184.7119
  MAPE: 102.48%

Category: Prayer Attire & Equipment
  RMSE: 1559.2250
  MAE: 282.3325
  MAPE: 100.00%

Category: Printers & Scanners
  RMSE: 67828.3642
  MAE: 47307.1678
  MAPE: 110.16%

Category: Projectors & Accessories
  RMSE: 40321.5403
  MAE: 24923.7594
  MAPE: 132.95%

Category: Remote Controls
  RMSE: 183894.1382
  MAE: 42833.2952
  MAPE:

100%|██████████| 215/215 [00:01<00:00, 179.02it/s]

Category: Socks
  RMSE: 453010.7784
  MAE: 220046.7076
  MAPE: 54.14%

Category: Socks & Stockings
  RMSE: 211163.0381
  MAE: 158699.4175
  MAPE: 85.16%

Category: Softwares
  RMSE: 5226.9852
  MAE: 3837.2995
  MAPE: 203.25%

Category: Souvenirs
  RMSE: 270882.2479
  MAE: 203790.7228
  MAPE: 39.23%

Category: Sports & Outdoor Accessories
  RMSE: 1888411.9028
  MAE: 897906.1805
  MAPE: 77.50%

Category: Sports & Outdoor Apparels
  RMSE: 225978.6460
  MAE: 191058.5399
  MAPE: 88.71%

Category: Sports & Outdoor Recreation Equipments
  RMSE: 7045811.6583
  MAE: 3358083.3996
  MAPE: 338.79%

Category: Sports Footwear
  RMSE: 20920.2211
  MAE: 16941.2105
  MAPE: 83.69%

Category: Suits
  RMSE: 16442.1130
  MAE: 9125.2557
  MAPE: 97.42%

Category: Sweaters & Cardigans
  RMSE: 46024.5820
  MAE: 36821.2572
  MAPE: 147.57%

Category: TVs & Accessories
  RMSE: 32119.7357
  MAE: 18338.5562
  MAPE: 72.50%

Category: Tablets
  RMSE: 299367.5398
  MAE: 173305.5938
  MAPE: 534.76%

Category: Telco
  R




Unnamed: 0,category,RMSE,MAE,MAPE
0,Accessories,3077450.0,1738737.0,51.401518
1,Accessories Sets & Packages,34300.89,21365.39,117.407867
2,Additional Accessories,595147.1,483035.4,66.561355
3,Alcoholic Beverages,18645.97,9828.945,95.609523
4,Amplifiers & Mixers,19603.31,13893.14,617.40371
5,Anklets,5343.573,3665.212,76.0579
6,Art Supplies,208959.2,159403.4,46.778743
7,Audio & Video Cables & Converters,67952.5,28619.73,120.317052
8,Automobile Exterior Accessories,70755.43,50403.3,66.195579
9,Automobile Interior Accessories,228563.8,105371.9,30.923134


In [5]:
# Display metrics table for first 10 categories
metrics_df.head(10).to_string()
metrics_df.head(10)

Unnamed: 0,category,RMSE,MAE,MAPE
0,Accessories,3077450.0,1738737.0,51.401518
1,Accessories Sets & Packages,34300.89,21365.39,117.407867
2,Additional Accessories,595147.1,483035.4,66.561355
3,Alcoholic Beverages,18645.97,9828.945,95.609523
4,Amplifiers & Mixers,19603.31,13893.14,617.40371
5,Anklets,5343.573,3665.212,76.0579
6,Art Supplies,208959.2,159403.4,46.778743
7,Audio & Video Cables & Converters,67952.5,28619.73,120.317052
8,Automobile Exterior Accessories,70755.43,50403.3,66.195579
9,Automobile Interior Accessories,228563.8,105371.9,30.923134
