In [1]:
!nvidia-smi

Thu Jan  2 21:17:03 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.06              Driver Version: 555.42.06      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-PCIE-40GB          Off |   00000000:27:00.0 Off |                   On |
| N/A   36C    P0             70W /  250W |                  N/A   |     N/A      Default |
|                                         |                        |              Enabled |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
!sudo apt-get install -y openjdk-11-jdk

# Set Java environment variables
import os
os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
os.environ['PATH'] = f"{os.environ['JAVA_HOME']}/bin:{os.environ['PATH']}"

# Install H2O (if not already installed)
!pip install h2o

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openjdk-11-jdk is already the newest version (11.0.25+9-1ubuntu1~22.04).
0 upgraded, 0 newly installed, 0 to remove and 13 not upgraded.


In [3]:
import h2o
print(h2o.__version__)
from h2o.automl import H2OAutoML

h2o.init(max_mem_size='16G')

3.46.0.6
Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.25" 2024-10-15; OpenJDK Runtime Environment (build 11.0.25+9-post-Ubuntu-1ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.25+9-post-Ubuntu-1ubuntu122.04, mixed mode, sharing)
  Starting server from /opt/conda/lib/python3.12/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp_px_why6
  JVM stdout: /tmp/tmp_px_why6/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmp_px_why6/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,01 secs
H2O_cluster_timezone:,UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.6
H2O_cluster_version_age:,2 months
H2O_cluster_name:,H2O_from_python_unknownUser_bfph12
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,16 Gb
H2O_cluster_total_cores:,30
H2O_cluster_allowed_cores:,30


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
import xgboost as xgb

import warnings
warnings.filterwarnings('ignore')

In [5]:
pd.set_option('display.max_colwidth', None)

In [6]:
train= pd.read_csv('/home/onyxia/work/Forecasting_Sticker_Sales/train.csv')
test= pd.read_csv('/home/onyxia/work/Forecasting_Sticker_Sales/test.csv')
train = train.set_index('id')
test = test.set_index('id')
train = train.drop_duplicates()
test = test.drop_duplicates()
print("train_data shape :",train.shape)
print("test_data shape :",test.shape)

train_data shape : (230130, 5)
test_data shape : (98550, 4)


In [7]:
train.head(20)

Unnamed: 0_level_0,date,country,store,product,num_sold
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2010-01-01,Canada,Discount Stickers,Holographic Goose,
1,2010-01-01,Canada,Discount Stickers,Kaggle,973.0
2,2010-01-01,Canada,Discount Stickers,Kaggle Tiers,906.0
3,2010-01-01,Canada,Discount Stickers,Kerneler,423.0
4,2010-01-01,Canada,Discount Stickers,Kerneler Dark Mode,491.0
5,2010-01-01,Canada,Stickers for Less,Holographic Goose,300.0
6,2010-01-01,Canada,Stickers for Less,Kaggle,1837.0
7,2010-01-01,Canada,Stickers for Less,Kaggle Tiers,1659.0
8,2010-01-01,Canada,Stickers for Less,Kerneler,807.0
9,2010-01-01,Canada,Stickers for Less,Kerneler Dark Mode,940.0


In [8]:
# train.describe().T

In [9]:
train.tail(20)

Unnamed: 0_level_0,date,country,store,product,num_sold
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
230110,2016-12-31,Norway,Premium Sticker Mart,Holographic Goose,576.0
230111,2016-12-31,Norway,Premium Sticker Mart,Kaggle,3459.0
230112,2016-12-31,Norway,Premium Sticker Mart,Kaggle Tiers,2735.0
230113,2016-12-31,Norway,Premium Sticker Mart,Kerneler,1690.0
230114,2016-12-31,Norway,Premium Sticker Mart,Kerneler Dark Mode,1857.0
230115,2016-12-31,Singapore,Discount Stickers,Holographic Goose,200.0
230116,2016-12-31,Singapore,Discount Stickers,Kaggle,1259.0
230117,2016-12-31,Singapore,Discount Stickers,Kaggle Tiers,931.0
230118,2016-12-31,Singapore,Discount Stickers,Kerneler,556.0
230119,2016-12-31,Singapore,Discount Stickers,Kerneler Dark Mode,637.0


In [10]:
train_info={
    'Data Type': train.dtypes,
    'Missing Values(MV)': train.isnull().sum(),
    'MV percentage(%)' : train.isnull().mean() * 100,
    'Unique Values': train.nunique()
}

train_summary = pd.DataFrame(train_info)
train_summary

Unnamed: 0,Data Type,Missing Values(MV),MV percentage(%),Unique Values
date,object,0,0.0,2557
country,object,0,0.0,6
store,object,0,0.0,3
product,object,0,0.0,5
num_sold,float64,8871,3.854778,4037


In [11]:
train['date'] = pd.to_datetime(train['date'])
test['date'] = pd.to_datetime(test['date'])

print(train.dtypes)
print('\n')
print(train.dtypes)

date        datetime64[ns]
country             object
store               object
product             object
num_sold           float64
dtype: object


date        datetime64[ns]
country             object
store               object
product             object
num_sold           float64
dtype: object


In [12]:
target = (set(train.columns) - set(test.columns)).pop()

print(f"Target column: {target}")
print(f"Data type: {train[target].dtype}")

Target column: num_sold
Data type: float64


In [13]:
print(train['country'].unique())
print(train['store'].unique())
print(train['product'].unique())

['Canada' 'Finland' 'Italy' 'Kenya' 'Norway' 'Singapore']
['Discount Stickers' 'Stickers for Less' 'Premium Sticker Mart']
['Holographic Goose' 'Kaggle' 'Kaggle Tiers' 'Kerneler'
 'Kerneler Dark Mode']


In [14]:
unique = {
    "Column": ['country', 'store', 'product'],
    "Value": ['Canada, Finland, Italy, Kenya, Norway, Singapore', 'Discount Stickers, Stickers for Less, Premium Sticker Mart', 'Holographic Goose, Kaggle, Kaggle Tiers, Kerneler, Kerneler Dark Mode'],
    "Count": [6, 3, 5]
}
unique = pd.DataFrame(unique)

print('The unique value of Original Train Dataset:\n')
unique

The unique value of Original Train Dataset:



Unnamed: 0,Column,Value,Count
0,country,"Canada, Finland, Italy, Kenya, Norway, Singapore",6
1,store,"Discount Stickers, Stickers for Less, Premium Sticker Mart",3
2,product,"Holographic Goose, Kaggle, Kaggle Tiers, Kerneler, Kerneler Dark Mode",5


### Feature Engineering

In [15]:
le = LabelEncoder()
object_cols = train.select_dtypes(include=['object']).columns
for col in object_cols:
    train[col] = le.fit_transform(train[col])    
    test[col] = le.transform(test[col])

print(train.dtypes.T)
print('\n')
print(test.dtypes.T)

date        datetime64[ns]
country              int64
store                int64
product              int64
num_sold           float64
dtype: object


date       datetime64[ns]
country             int64
store               int64
product             int64
dtype: object


In [16]:
def create_features(data):
    """
    Create time series features based on time series index.
    """
    data = data.copy()
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day
    data['weekday'] = data['date'].dt.weekday  # 0 = Monday, 6 = Sunday
    data['quarter'] = data['date'].dt.quarter

    # Interaction Features: Combine categorical variables
    data['country_store'] = data['country'] * 10 + data['store']
    data['store_product'] = data['store'] * 10 + data['product']
    data['country_product'] = data['country'] * 10 + data['product']
    
    # Cyclic Features: Encode seasonality for 'month' and 'weekday'
    data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
    data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)
    data['weekday_sin'] = np.sin(2 * np.pi * data['weekday'] / 7)
    data['weekday_cos'] = np.cos(2 * np.pi * data['weekday'] / 7)
    
    return data

train = create_features(train)
train.head(10)

Unnamed: 0_level_0,date,country,store,product,num_sold,year,month,day,weekday,quarter,country_store,store_product,country_product,month_sin,month_cos,weekday_sin,weekday_cos
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,2010-01-01,0,0,0,,2010,1,1,4,1,0,0,0,0.5,0.866025,-0.433884,-0.900969
1,2010-01-01,0,0,1,973.0,2010,1,1,4,1,0,1,1,0.5,0.866025,-0.433884,-0.900969
2,2010-01-01,0,0,2,906.0,2010,1,1,4,1,0,2,2,0.5,0.866025,-0.433884,-0.900969
3,2010-01-01,0,0,3,423.0,2010,1,1,4,1,0,3,3,0.5,0.866025,-0.433884,-0.900969
4,2010-01-01,0,0,4,491.0,2010,1,1,4,1,0,4,4,0.5,0.866025,-0.433884,-0.900969
5,2010-01-01,0,2,0,300.0,2010,1,1,4,1,2,20,0,0.5,0.866025,-0.433884,-0.900969
6,2010-01-01,0,2,1,1837.0,2010,1,1,4,1,2,21,1,0.5,0.866025,-0.433884,-0.900969
7,2010-01-01,0,2,2,1659.0,2010,1,1,4,1,2,22,2,0.5,0.866025,-0.433884,-0.900969
8,2010-01-01,0,2,3,807.0,2010,1,1,4,1,2,23,3,0.5,0.866025,-0.433884,-0.900969
9,2010-01-01,0,2,4,940.0,2010,1,1,4,1,2,24,4,0.5,0.866025,-0.433884,-0.900969


In [17]:
train.tail(10)

Unnamed: 0_level_0,date,country,store,product,num_sold,year,month,day,weekday,quarter,country_store,store_product,country_product,month_sin,month_cos,weekday_sin,weekday_cos
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
230120,2016-12-31,5,2,0,384.0,2016,12,31,5,4,52,20,50,-2.449294e-16,1.0,-0.974928,-0.222521
230121,2016-12-31,5,2,1,2380.0,2016,12,31,5,4,52,21,51,-2.449294e-16,1.0,-0.974928,-0.222521
230122,2016-12-31,5,2,2,2103.0,2016,12,31,5,4,52,22,52,-2.449294e-16,1.0,-0.974928,-0.222521
230123,2016-12-31,5,2,3,1052.0,2016,12,31,5,4,52,23,53,-2.449294e-16,1.0,-0.974928,-0.222521
230124,2016-12-31,5,2,4,1312.0,2016,12,31,5,4,52,24,54,-2.449294e-16,1.0,-0.974928,-0.222521
230125,2016-12-31,5,1,0,466.0,2016,12,31,5,4,51,10,50,-2.449294e-16,1.0,-0.974928,-0.222521
230126,2016-12-31,5,1,1,2907.0,2016,12,31,5,4,51,11,51,-2.449294e-16,1.0,-0.974928,-0.222521
230127,2016-12-31,5,1,2,2299.0,2016,12,31,5,4,51,12,52,-2.449294e-16,1.0,-0.974928,-0.222521
230128,2016-12-31,5,1,3,1242.0,2016,12,31,5,4,51,13,53,-2.449294e-16,1.0,-0.974928,-0.222521
230129,2016-12-31,5,1,4,1622.0,2016,12,31,5,4,51,14,54,-2.449294e-16,1.0,-0.974928,-0.222521


In [18]:
test = create_features(test)
test.head(10)

Unnamed: 0_level_0,date,country,store,product,year,month,day,weekday,quarter,country_store,store_product,country_product,month_sin,month_cos,weekday_sin,weekday_cos
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
230130,2017-01-01,0,0,0,2017,1,1,6,1,0,0,0,0.5,0.866025,-0.781831,0.62349
230131,2017-01-01,0,0,1,2017,1,1,6,1,0,1,1,0.5,0.866025,-0.781831,0.62349
230132,2017-01-01,0,0,2,2017,1,1,6,1,0,2,2,0.5,0.866025,-0.781831,0.62349
230133,2017-01-01,0,0,3,2017,1,1,6,1,0,3,3,0.5,0.866025,-0.781831,0.62349
230134,2017-01-01,0,0,4,2017,1,1,6,1,0,4,4,0.5,0.866025,-0.781831,0.62349
230135,2017-01-01,0,2,0,2017,1,1,6,1,2,20,0,0.5,0.866025,-0.781831,0.62349
230136,2017-01-01,0,2,1,2017,1,1,6,1,2,21,1,0.5,0.866025,-0.781831,0.62349
230137,2017-01-01,0,2,2,2017,1,1,6,1,2,22,2,0.5,0.866025,-0.781831,0.62349
230138,2017-01-01,0,2,3,2017,1,1,6,1,2,23,3,0.5,0.866025,-0.781831,0.62349
230139,2017-01-01,0,2,4,2017,1,1,6,1,2,24,4,0.5,0.866025,-0.781831,0.62349


In [19]:
print(train.nunique())
print('\n')
print(test.nunique())

date               2557
country               6
store                 3
product               5
num_sold           4037
year                  7
month                12
day                  31
weekday               7
quarter               4
country_store        18
store_product        15
country_product      30
month_sin            11
month_cos            11
weekday_sin           7
weekday_cos           7
dtype: int64


date               1095
country               6
store                 3
product               5
year                  3
month                12
day                  31
weekday               7
quarter               4
country_store        18
store_product        15
country_product      30
month_sin            11
month_cos            11
weekday_sin           7
weekday_cos           7
dtype: int64


In [20]:
correlation_matrix = train.corr()

# Display a summary of the new features and correlation matrix
print(correlation_matrix['num_sold'].sort_values(ascending=False))

num_sold           1.000000
store              0.239883
store_product      0.237983
country_store      0.140422
country_product    0.129382
country            0.129113
weekday            0.069613
month_sin          0.014119
weekday_cos        0.013453
product            0.004255
day                0.001137
month_cos         -0.001781
month             -0.006255
quarter           -0.006670
year              -0.040462
date              -0.040936
weekday_sin       -0.063575
Name: num_sold, dtype: float64


In [21]:
# scaler = StandardScaler()
# train['num_sold_scaled'] = scaler.fit_transform(train[['num_sold']])
# train

### Model Building

In [26]:
#X = train.drop(['num_sold'], axis=1)
#y = train['num_sold']

# Split the data
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Create a scorer for GridSearchCV
mape_scorer = make_scorer(mean_absolute_percentage_error, greater_is_better=False)

In [24]:
train_h2o = h2o.H2OFrame(train)
test_h2o = h2o.H2OFrame(test)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [25]:
target = 'num_sold'
features = [col for col in train.columns if col != target]

In [29]:
train_h2o, valid_h2o = train_h2o.split_frame(ratios=[0.8], seed=42)

In [32]:
# Define and run AutoML
aml = H2OAutoML(max_models=20, max_runtime_secs=1200)
aml.train(x=features, y=target, training_frame=train_h2o, validation_frame=valid_h2o)

AutoML progress: |
21:23:29.338: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.


21:23:30.486: XGBoost_1_AutoML_1_20250102_212329 [XGBoost def_2] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for XGBoost model: XGBoost_1_AutoML_1_20250102_212329_cv_1.  Details: ERRR on field: _response_column: Response contains missing values (NAs) - not supported by XGBoost.


██████
21:23:57.405: XGBoost_2_AutoML_1_20250102_212329 [XGBoost def_1] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for XGBoost model: XGBoost_2_AutoML_1_20250102_212329_cv_1.  Details: ERRR on field: _response_column: Response contains missing values (NAs) - not supported by XGBoost.


███████████
21:27:41.252: XGBoost_3_AutoML_1_20250102_

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,122.0,122.0,828877.0,0.0,10.0,8.032787,1.0,945.0,537.0738

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,,0.0,,,,,
loglikelihood,,0.0,,,,,
mae,34.649597,0.1853877,34.91612,34.52037,34.60811,34.454334,34.749058
mean_residual_deviance,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
mse,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
r2,0.9926812,0.0001262,0.9926984,0.9928802,0.992574,0.9925701,0.9926831
residual_deviance,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
rmse,59.1662,0.4287077,59.354584,58.42808,59.538116,59.274166,59.236053
rmsle,,0.0,,,,,

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance,validation_rmse,validation_mae,validation_deviance
,2025-01-02 21:27:37,13.231 sec,0.0,691.6350036,531.5634938,478358.9781550,687.1091229,530.6555961,472118.9468026
,2025-01-02 21:27:38,13.435 sec,5.0,415.6404085,317.8759673,172756.9492156,413.4110431,317.4538322,170908.6905328
,2025-01-02 21:27:38,13.612 sec,10.0,256.2993885,193.3508368,65689.3765443,255.7226897,193.3393805,65394.0940411
,2025-01-02 21:27:38,13.789 sec,15.0,165.3272621,121.3618863,27333.1035803,166.2394486,121.7361538,27635.5542635
,2025-01-02 21:27:38,13.978 sec,20.0,116.7350225,82.3633937,13627.0654718,118.9050483,83.2407313,14138.4105025
,2025-01-02 21:27:38,14.150 sec,25.0,91.1490176,61.3415904,8308.1434139,94.4396505,62.7166304,8918.8475909
,2025-01-02 21:27:38,14.322 sec,30.0,78.0436334,50.5819469,6090.8087188,82.0911356,52.3140396,6738.9545374
,2025-01-02 21:27:39,14.492 sec,35.0,70.3601835,43.8810717,4950.5554215,75.0869625,45.8778737,5638.0519429
,2025-01-02 21:27:39,14.659 sec,40.0,66.0917313,40.0598166,4368.1169470,71.1759920,42.2375250,5066.0218393
,2025-01-02 21:27:39,14.821 sec,45.0,63.2667275,37.7384538,4002.6788067,68.6418247,40.0703674,4711.7001042

variable,relative_importance,scaled_importance,percentage
country_product,104647344128.0,1.0,0.35168
country_store,68382507008.0,0.6534567,0.2298076
product,62068576256.0,0.5931214,0.2085889
store_product,17738739712.0,0.1695097,0.0596132
store,15425274880.0,0.1474024,0.0518385
country,14491387904.0,0.1384783,0.0487001
year,4842516480.0,0.0462746,0.0162739
date,2463348992.0,0.0235395,0.0082784
weekday,1925412096.0,0.0183991,0.0064706
weekday_sin,1834713600.0,0.0175323,0.0061658


In [33]:
lb = aml.leaderboard
print(lb)

model_id                                        rmse      mse      mae        rmsle    mean_residual_deviance
GBM_4_AutoML_1_20250102_212329               59.1676  3500.8   34.6496  nan                           3500.8
GBM_1_AutoML_1_20250102_212329               59.7187  3566.32  34.7686  nan                           3566.32
GBM_3_AutoML_1_20250102_212329               60.3151  3637.92  36.123   nan                           3637.92
GBM_grid_1_AutoML_1_20250102_212329_model_5  60.3521  3642.38  35.2988  nan                           3642.38
GBM_2_AutoML_1_20250102_212329               61.7995  3819.18  37.298   nan                           3819.18
GBM_5_AutoML_1_20250102_212329               63.4918  4031.21  38.7805  nan                           4031.21
GBM_grid_1_AutoML_1_20250102_212329_model_1  63.9322  4087.33  38.9874  nan                           4087.33
GBM_grid_1_AutoML_1_20250102_212329_model_2  64.774   4195.67  39.4147  nan                           4195.67
GBM_grid_1_

In [34]:
leader = aml.leader
leader

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,122.0,122.0,828877.0,0.0,10.0,8.032787,1.0,945.0,537.0738

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,,0.0,,,,,
loglikelihood,,0.0,,,,,
mae,34.649597,0.1853877,34.91612,34.52037,34.60811,34.454334,34.749058
mean_residual_deviance,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
mse,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
r2,0.9926812,0.0001262,0.9926984,0.9928802,0.992574,0.9925701,0.9926831
residual_deviance,3500.7861,50.530464,3522.9663,3413.8408,3544.7874,3513.4265,3508.91
rmse,59.1662,0.4287077,59.354584,58.42808,59.538116,59.274166,59.236053
rmsle,,0.0,,,,,

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance,validation_rmse,validation_mae,validation_deviance
,2025-01-02 21:27:37,13.231 sec,0.0,691.6350036,531.5634938,478358.9781550,687.1091229,530.6555961,472118.9468026
,2025-01-02 21:27:38,13.435 sec,5.0,415.6404085,317.8759673,172756.9492156,413.4110431,317.4538322,170908.6905328
,2025-01-02 21:27:38,13.612 sec,10.0,256.2993885,193.3508368,65689.3765443,255.7226897,193.3393805,65394.0940411
,2025-01-02 21:27:38,13.789 sec,15.0,165.3272621,121.3618863,27333.1035803,166.2394486,121.7361538,27635.5542635
,2025-01-02 21:27:38,13.978 sec,20.0,116.7350225,82.3633937,13627.0654718,118.9050483,83.2407313,14138.4105025
,2025-01-02 21:27:38,14.150 sec,25.0,91.1490176,61.3415904,8308.1434139,94.4396505,62.7166304,8918.8475909
,2025-01-02 21:27:38,14.322 sec,30.0,78.0436334,50.5819469,6090.8087188,82.0911356,52.3140396,6738.9545374
,2025-01-02 21:27:39,14.492 sec,35.0,70.3601835,43.8810717,4950.5554215,75.0869625,45.8778737,5638.0519429
,2025-01-02 21:27:39,14.659 sec,40.0,66.0917313,40.0598166,4368.1169470,71.1759920,42.2375250,5066.0218393
,2025-01-02 21:27:39,14.821 sec,45.0,63.2667275,37.7384538,4002.6788067,68.6418247,40.0703674,4711.7001042

variable,relative_importance,scaled_importance,percentage
country_product,104647344128.0,1.0,0.35168
country_store,68382507008.0,0.6534567,0.2298076
product,62068576256.0,0.5931214,0.2085889
store_product,17738739712.0,0.1695097,0.0596132
store,15425274880.0,0.1474024,0.0518385
country,14491387904.0,0.1384783,0.0487001
year,4842516480.0,0.0462746,0.0162739
date,2463348992.0,0.0235395,0.0082784
weekday,1925412096.0,0.0183991,0.0064706
weekday_sin,1834713600.0,0.0175323,0.0061658


In [35]:
performance = leader.model_performance(valid_h2o)
print(performance)

ModelMetricsRegression: gbm
** Reported on test data. **

MSE: 3454.2344609346874
RMSE: 58.77273569381204
MAE: 34.51039930090253
RMSLE: NaN
Mean Residual Deviance: 3454.2344609346874


## Forecast on Test