In [2]:
# --- Imports ---
import pandas as pd
import numpy as np
import os
import shutil # <-- Import shutil for deleting directories
from pathlib import Path
import urllib.request
import multiprocessing
from functools import partial
from tqdm import tqdm
import time

# --- Model & Feature Extraction Imports ---
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from sentence_transformers import SentenceTransformer
import lightgbm as lgb
from sklearn.model_selection import train_test_split

# --- Helper Function: SMAPE Metric ---
def smape(y_true, y_pred):
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    epsilon = 1e-8
    return np.mean(numerator / (denominator + epsilon)) * 100

# --- Helper Function: Image Downloader ---
def download_image(image_link, savefolder):
    if isinstance(image_link, str):
        filename = Path(image_link).name
        image_save_path = os.path.join(savefolder, filename)
        if not os.path.exists(image_save_path):
            try:
                urllib.request.urlretrieve(image_link, image_save_path)
            except Exception as ex:
                pass
    return

def download_images_parallel(image_links, download_folder):
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)
    download_image_partial = partial(download_image, savefolder=download_folder)
    with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
        list(tqdm(pool.imap(download_image_partial, image_links), total=len(image_links)))

print("Setup Complete. All functions and libraries are loaded.")

Setup Complete. All functions and libraries are loaded.


In [3]:
import sentence_transformers, transformers, huggingface_hub
print("sentence-transformers:", sentence_transformers.__version__)
print("transformers:", transformers.__version__)
print("huggingface-hub:", huggingface_hub.__version__)


sentence-transformers: 2.7.0
transformers: 4.41.2
huggingface-hub: 0.23.2


In [4]:
import torch

# Check for GPU availability and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [5]:
#!pip install tensorflow==2.16.1 --upgrade

In [6]:
#!pip install -U sentence-transformers==2.7.0 transformers==4.41.2 huggingface-hub==0.23.2

In [7]:
# --- 1. Load Data ---
print("--- Loading Full Data ---")
train_df = pd.read_csv('/kaggle/input/amazon-ml/train.csv')
test_df = pd.read_csv('/kaggle/input/amazon-ml/test.csv')


# Apply log transformation to the price
train_df['log_price'] = np.log1p(train_df['price'])
print("DataFrames loaded successfully.")
print(f"Using {len(train_df)} training samples and {len(test_df)} testing samples.")

--- Loading Full Data ---
DataFrames loaded successfully.
Using 75000 training samples and 75000 testing samples.


In [8]:
train_df.head()

Unnamed: 0,sample_id,catalog_content,image_link,price,log_price
0,33127,"Item Name: La Victoria Green Taco Sauce Mild, ...",https://m.media-amazon.com/images/I/51mo8htwTH...,4.89,1.773256
1,198967,"Item Name: Salerno Cookies, The Original Butte...",https://m.media-amazon.com/images/I/71YtriIHAA...,13.12,2.647592
2,261251,"Item Name: Bear Creek Hearty Soup Bowl, Creamy...",https://m.media-amazon.com/images/I/51+PFEe-w-...,1.97,1.088562
3,55858,Item Name: Judee’s Blue Cheese Powder 11.25 oz...,https://m.media-amazon.com/images/I/41mu0HAToD...,30.34,3.444895
4,292686,"Item Name: kedem Sherry Cooking Wine, 12.7 Oun...",https://m.media-amazon.com/images/I/41sA037+Qv...,66.49,4.211979


In [9]:
# --- 2. Process Training Images ---
TRAIN_IMAGE_DIR = 'dataset/images/train'
TRAIN_IMG_EMBEDDINGS_FILE = 'train_image_embeddings.npy'

if os.path.exists(TRAIN_IMG_EMBEDDINGS_FILE):
    print("✅ Training image embeddings already exist. Loading from file.")
    train_image_embeddings = np.load(TRAIN_IMG_EMBEDDINGS_FILE)
else:
    print(f"--- Downloading {len(train_df)} Training Images ---")
    download_images_parallel(train_df['image_link'].tolist(), TRAIN_IMAGE_DIR)
    
    print("\n--- Extracting Training Image Features ---")
    image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

    
    
    # MODIFIED Code
    def extract_image_features_batched(image_paths, batch_size=64):
        all_features = []
        num_images = len(image_paths)
        for i in tqdm(range(0, num_images, batch_size)):
            batch_paths = image_paths[i:i + batch_size]
            batch_images = []
            # Keep track of which images failed to load
            valid_indices = []
            
            for idx, path in enumerate(batch_paths):
                try:
                    img = image.load_img(path, target_size=(224, 224))
                    img_array = image.img_to_array(img)
                    batch_images.append(img_array)
                    valid_indices.append(True)
                except Exception:
                    valid_indices.append(False) # Mark this image as failed
    
            if not batch_images: # If the whole batch failed
                all_features.extend([np.zeros(2048) for _ in batch_paths])
                continue
                
            # Preprocess the entire batch at once
            batch_images_np = np.array(batch_images)
            preprocessed_batch = preprocess_input(batch_images_np)
            
            # Predict on the entire batch
            feature_vectors = image_model.predict(preprocessed_batch, verbose=0)
            
            # Place results back correctly, filling failures with zeros
            features_with_zeros = []
            feature_iter = iter(feature_vectors)
            for is_valid in valid_indices:
                if is_valid:
                    features_with_zeros.append(next(feature_iter).flatten())
                else:
                    features_with_zeros.append(np.zeros(2048))
            all_features.extend(features_with_zeros)
    
        return np.array(all_features)
    
    # --- Then, call the new function ---
    train_image_paths = [os.path.join(TRAIN_IMAGE_DIR, Path(link).name) for link in train_df['image_link']]
    train_image_embeddings = extract_image_features_batched(train_image_paths)
    # (and do the same for the test images in the next cell)
    
    print("\n--- Saving Training Image Embeddings ---")
    np.save(TRAIN_IMG_EMBEDDINGS_FILE, train_image_embeddings)
    
    print("\n--- Deleting Training Images to Free Space ---")
    shutil.rmtree(TRAIN_IMAGE_DIR)
    print("✅ Training images deleted.")

print(f"Train image embeddings ready. Shape: {train_image_embeddings.shape}")

✅ Training image embeddings already exist. Loading from file.
Train image embeddings ready. Shape: (75000, 2048)


In [10]:
# --- 3. Process Test Images ---
TEST_IMAGE_DIR = 'dataset/images/test'
TEST_IMG_EMBEDDINGS_FILE = 'test_image_embeddings.npy'

if os.path.exists(TEST_IMG_EMBEDDINGS_FILE):
    print("✅ Test image embeddings already exist. Loading from file.")
    test_image_embeddings = np.load(TEST_IMG_EMBEDDINGS_FILE)
else:
    print(f"--- Downloading {len(test_df)} Test Images ---")
    download_images_parallel(test_df['image_link'].tolist(), TEST_IMAGE_DIR)
    
    print("\n--- Extracting Test Image Features ---")
    # We can reuse the image_model if the previous cell was run in the same session
    try:
        image_model
    except NameError:
        image_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

    # Re-using the feature extraction function defined in the previous cell
    test_image_paths = [os.path.join(TEST_IMAGE_DIR, Path(link).name) for link in test_df['image_link']]
    test_image_embeddings = extract_image_features_batched(test_image_paths)
    
    print("\n--- Saving Test Image Embeddings ---")
    np.save(TEST_IMG_EMBEDDINGS_FILE, test_image_embeddings)
    
    print("\n--- Deleting Test Images to Free Space ---")
    shutil.rmtree(TEST_IMAGE_DIR)
    print("✅ Test images deleted.")

print(f"Test image embeddings ready. Shape: {test_image_embeddings.shape}")

✅ Test image embeddings already exist. Loading from file.
Test image embeddings ready. Shape: (75000, 2048)


In [11]:
# --- 4. Text Feature Extraction ---
TRAIN_TXT_EMBEDDINGS_FILE = 'train_text_embeddings.npy'
TEST_TXT_EMBEDDINGS_FILE = 'test_text_embeddings.npy'

if os.path.exists(TRAIN_TXT_EMBEDDINGS_FILE) and os.path.exists(TEST_TXT_EMBEDDINGS_FILE):
    print("--- Loading text embeddings from saved files ---")
    train_text_embeddings = np.load(TRAIN_TXT_EMBEDDINGS_FILE)
    test_text_embeddings = np.load(TEST_TXT_EMBEDDINGS_FILE)
    print("✅ Text embeddings loaded successfully.")
else:
    print("--- Generating and saving text embeddings ---")
    # MODIFIED Code
    # Explicitly tell the model to use the 'cuda' (GPU) device
    text_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', trust_remote_code=True, device='cuda')
    
    # Increase the batch size to process more data at once on the GPU
    train_text_embeddings = text_model.encode(
        train_df['catalog_content'].tolist(),
        show_progress_bar=True,
        batch_size=256  # Adjust batch size based on GPU memory
    )
    test_text_embeddings = text_model.encode(
        test_df['catalog_content'].tolist(),
        show_progress_bar=True,
        batch_size=256
    )
    
    np.save(TRAIN_TXT_EMBEDDINGS_FILE, train_text_embeddings)
    np.save(TEST_TXT_EMBEDDINGS_FILE, test_text_embeddings)
    print("✅ Text embeddings generated and saved.")

print(f"Train text embeddings shape: {train_text_embeddings.shape}")

--- Loading text embeddings from saved files ---
✅ Text embeddings loaded successfully.
Train text embeddings shape: (75000, 384)


In [12]:
# --- 5. Combine Features & Train Model ---
print("\n--- Combining Features and Training Model ---")
combined_train_features = np.hstack([train_text_embeddings, train_image_embeddings])

X = combined_train_features
y = train_df['log_price']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
y_val_original = np.expm1(y_val)

# --- You can change model parameters here and rerun ---
lgbm = lgb.LGBMRegressor(random_state=42, n_estimators=200, learning_rate=0.05)
lgbm.fit(X_train, y_train)

# --- Evaluate the new model ---
print("\n--- Evaluating the new model ---")
log_val_preds = lgbm.predict(X_val)
val_preds = np.expm1(log_val_preds)
validation_score = smape(y_val_original, val_preds)
print(f"✅ Validation SMAPE Score (Multi-modal): {validation_score:.4f}")


--- Combining Features and Training Model ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 4.652498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 620160
[LightGBM] [Info] Number of data points in the train set: 60000, number of used features: 2432
[LightGBM] [Info] Start training from score 2.740904

--- Evaluating the new model ---
✅ Validation SMAPE Score (Multi-modal): 60.4032


### **Some Feature Engineering**

In [13]:
import re
import numpy as np
import pandas as pd

# ===================================================================
# 1. DEFINE ALL FEATURE ENGINEERING FUNCTIONS
# ===================================================================

# --- For Quantity (Weight/Volume) ---
UNIT_CONVERSIONS = {
    # Weight
    'kg': 1000, 'kilogram': 1000, 'kgs': 1000,
    'g': 1, 'gm': 1, 'gram': 1, 'gms': 1,
    'mg': 0.001, 'milligram': 0.001,
    'lb': 453.592, 'pound': 453.592, 'lbs': 453.592,
    'oz': 28.35, 'ounce': 28.35,
    # Volume
    'l': 1000, 'liter': 1000, 'liters': 1000,
    'ml': 1, 'milliliter': 1,
    'floz': 29.5735, 'fluid ounce': 29.5735
}

def extract_quantity(text):
    text = str(text).lower()
    pattern = r"(\d+\.?\d*)\s?(" + "|".join(UNIT_CONVERSIONS.keys()) + r")\b"
    match = re.search(pattern, text)
    if match:
        value = float(match.group(1))
        unit = match.group(2)
        return value * UNIT_CONVERSIONS[unit]
    return np.nan

# --- For Pack Count ---
def extract_pack_count(text):
    text = str(text).lower()
    match = re.search(r"(?:pack|set)\s+of\s+(\d+)", text)
    if match:
        return int(match.group(1))
    match = re.search(r"(\d+)\s*(?:-|pack|count|ct)\b", text)
    if match:
        return int(match.group(1))
    return 1 # Default to 1 if no pack info found

# --- For Brand Name ---
def extract_brand(text):
    try:
        return str(text).split()[0].lower()
    except IndexError:
        return "unknown"

# ===================================================================

In [14]:
# 2. APPLY FUNCTIONS TO DATAFRAMES
# ===================================================================
print("--- Starting Feature Engineering ---")

# We'll operate on both DataFrames at once
for df in [train_df, test_df]:
    print(f"Processing DataFrame of shape: {df.shape}")
    
    # Extract numerical features
    df['quantity_std'] = df['catalog_content'].apply(extract_quantity)
    df['pack_count'] = df['catalog_content'].apply(extract_pack_count)
    df['text_length'] = df['catalog_content'].str.len()
    
    # Extract keyword flag
    df['is_organic'] = df['catalog_content'].str.contains('organic', case=False, regex=False).astype(int)
    
    # Extract brand name
    df['brand'] = df['catalog_content'].apply(extract_brand)

print("\n--- Raw Feature Extraction Complete ---")

--- Starting Feature Engineering ---
Processing DataFrame of shape: (75000, 5)
Processing DataFrame of shape: (75000, 3)

--- Raw Feature Extraction Complete ---


In [15]:
df.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,sample_id,catalog_content,image_link,quantity_std,pack_count,text_length,is_organic,brand
0,100179,Item Name: Rani 14-Spice Eshamaya's Mango Chut...,https://m.media-amazon.com/images/I/71hoAn78AW...,297.675,14,1274,0,item
1,245611,Item Name: Natural MILK TEA Flavoring extract ...,https://m.media-amazon.com/images/I/61ex8NHCIj...,56.7,1,1720,0,item
2,146263,Item Name: Honey Filled Hard Candy - Bulk Pack...,https://m.media-amazon.com/images/I/61KCM61J8e...,,2,769,0,item
3,95658,Item Name: Vlasic Snack'mm's Kosher Dill 16 Oz...,https://m.media-amazon.com/images/I/51Ex6uOH7y...,453.6,2,82,0,item
4,36806,"Item Name: McCormick Culinary Vanilla Extract,...",https://m.media-amazon.com/images/I/71QYlrOMoS...,946.352,32,1491,0,item


In [16]:
df.isnull().sum()

sample_id              0
catalog_content        0
image_link             0
quantity_std       20637
pack_count             0
text_length            0
is_organic             0
brand                  0
dtype: int64

In [17]:
# 3. HANDLE MISSING VALUES & ENCODE CATEGORICALS
# ===================================================================

# --- Handle Missing Numerical Values ---
# Calculate median from the training set ONLY
median_quantity = train_df['quantity_std'].median()
median_length = train_df['text_length'].median()

# Fill NaNs in both train and test sets using the training median
for df in [train_df, test_df]:
    df['quantity_std'].fillna(median_quantity, inplace=True)
    df['text_length'].fillna(median_length, inplace=True)

print(f"Missing quantities filled with median value: {median_quantity}")

# --- Encode Brand as a Categorical Feature ---
# Get all unique brands across both datasets to ensure consistency
all_brands = pd.concat([train_df['brand'], test_df['brand']]).unique()

# Convert the 'brand' column into a pandas Categorical type
train_df['brand'] = pd.Categorical(train_df['brand'], categories=all_brands)
test_df['brand'] = pd.Categorical(test_df['brand'], categories=all_brands)

print("Brand column successfully encoded.")

# ===================================================================

Missing quantities filled with median value: 283.5
Brand column successfully encoded.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['quantity_std'].fillna(median_quantity, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['text_length'].fillna(median_length, inplace=True)


In [18]:
df.isnull().sum()

sample_id          0
catalog_content    0
image_link         0
quantity_std       0
pack_count         0
text_length        0
is_organic         0
brand              0
dtype: int64

In [19]:
# 4. PREPARE FINAL FEATURE SET
# ===================================================================

# Select all the engineered feature columns we want to use
engineered_feature_cols = ['quantity_std', 'pack_count', 'text_length', 'is_organic', 'brand']

# Create the final feature DataFrames for the model
train_engineered_features_df = train_df[engineered_feature_cols]
test_engineered_features_df = test_df[engineered_feature_cols]

print("\n--- Feature Engineering Complete! ---")
print("Final engineered features ready for model training. Example:")
print(train_engineered_features_df.head())
print("\nData types of new features:")
print(train_engineered_features_df.info())


--- Feature Engineering Complete! ---
Final engineered features ready for model training. Example:
   quantity_std  pack_count  text_length  is_organic brand
0      340.2000           6           91           0  item
1      226.8000           4          511           0  item
2       53.8650           6          328           0  item
3      318.9375           1         1318           0  item
4      360.0450           1          155           0  item

Data types of new features:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75000 entries, 0 to 74999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   quantity_std  75000 non-null  float64 
 1   pack_count    75000 non-null  int64   
 2   text_length   75000 non-null  int64   
 3   is_organic    75000 non-null  int64   
 4   brand         75000 non-null  category
dtypes: category(1), float64(1), int64(3)
memory usage: 2.4 MB
None


In [20]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split

print("--- Combining all feature sets with unique column names ---")

# Convert numpy embedding arrays to pandas DataFrames with unique prefixes
train_text_df = pd.DataFrame(train_text_embeddings, index=train_df.index, columns=[f'txt_{i}' for i in range(train_text_embeddings.shape[1])])
train_image_df = pd.DataFrame(train_image_embeddings, index=train_df.index, columns=[f'img_{i}' for i in range(train_image_embeddings.shape[1])])

test_text_df = pd.DataFrame(test_text_embeddings, index=test_df.index, columns=[f'txt_{i}' for i in range(test_text_embeddings.shape[1])])
test_image_df = pd.DataFrame(test_image_embeddings, index=test_df.index, columns=[f'img_{i}' for i in range(test_image_embeddings.shape[1])])

# Concatenate all features horizontally (axis=1)
X = pd.concat([
    train_text_df,
    train_image_df,
    train_engineered_features_df
], axis=1)

X_test = pd.concat([
    test_text_df,
    test_image_df,
    test_engineered_features_df
], axis=1)

y = train_df['log_price']

print(f"Final training feature shape: {X.shape}")
print(f"Final test feature shape: {X_test.shape}")

--- Combining all feature sets with unique column names ---
Final training feature shape: (75000, 2437)
Final test feature shape: (75000, 2437)


In [21]:
X.shape

(75000, 2437)

In [22]:
import pandas as pd
import numpy as np
import time

# Import all the models we want to test
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

from sklearn.model_selection import train_test_split

# ===================================================================
# 1. SETUP THE BENCHMARK
# ===================================================================

# Split data for validation, which we'll use for all models
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
y_val_original = np.expm1(y_val) # For SMAPE calculation

# Define the models to test in a dictionary
models = {
    "LightGBM": lgb.LGBMRegressor(random_state=42, n_estimators=1000),
    "XGBoost": xgb.XGBRegressor(random_state=42, n_estimators=1000, n_jobs=-1, early_stopping_rounds=100),
    "CatBoost": cb.CatBoostRegressor(random_state=42, n_estimators=1000, verbose=0, early_stopping_rounds=100)
}

results = {}
best_iterations = {}

In [23]:
# ===================================================================
# 2. RUN THE BENCHMARKING LOOP
# ===================================================================
print("--- Starting Model Benchmarking ---")

for name, model in models.items():
    start_time = time.time()
    print(f"\n--- Training {name} ---")

    # Train each model with its specific parameters
    if name == "LightGBM":
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                  callbacks=[lgb.early_stopping(100, verbose=False)],
                  categorical_feature=['brand'])
        best_iterations[name] = model.best_iteration_
    elif name == "XGBoost":
        # XGBoost needs category columns to be of 'category' dtype, which we already did
        X_train_xgb = X_train.copy()
        X_val_xgb = X_val.copy()
        X_train_xgb['brand'] = X_train_xgb['brand'].cat.codes
        X_val_xgb['brand'] = X_val_xgb['brand'].cat.codes
        model.fit(X_train_xgb, y_train, eval_set=[(X_val_xgb, y_val)])
        best_iterations[name] = model.best_iteration
    elif name == "CatBoost":
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                  cat_features=['brand'])
        best_iterations[name] = model.best_iteration_
    
    # Make predictions and evaluate
    log_val_preds = model.predict(X_val)
    val_preds = np.expm1(log_val_preds)
    score = smape(y_val_original, val_preds)
    
    # Store results
    execution_time = time.time() - start_time
    results[name] = {'score': score, 'time': execution_time}
    print(f"✅ {name} Validation SMAPE: {score:.4f} (trained in {execution_time:.2f}s)")

# ===================================================================

--- Starting Model Benchmarking ---

--- Training LightGBM ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 4.164428 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 620869
[LightGBM] [Info] Number of data points in the train set: 60000, number of used features: 2436
[LightGBM] [Info] Start training from score 2.740904
✅ LightGBM Validation SMAPE: 55.2404 (trained in 469.42s)

--- Training XGBoost ---
[0]	validation_0-rmse:0.90113
[1]	validation_0-rmse:0.87000
[2]	validation_0-rmse:0.84868
[3]	validation_0-rmse:0.83370
[4]	validation_0-rmse:0.82329
[5]	validation_0-rmse:0.81651
[6]	validation_0-rmse:0.80998
[7]	validation_0-rmse:0.80513
[8]	validation_0-rmse:0.80084
[9]	validation_0-rmse:0.79622
[10]	validation_0-rmse:0.79350
[11]	validation_0-rmse:0.79031
[12]	validation_0-rmse:0.78817
[13]	validation_0-rmse:0.78586
[14]	validation_0-rmse:0.78390
[15]	validation_0-rmse:0.78244
[16]	validation_0-rms

In [24]:
# 3. SUMMARIZE RESULTS AND SELECT THE BEST MODEL
# ===================================================================
print("\n--- Benchmark Summary ---")
for name, result in results.items():
    print(f"{name:<10}: {result['score']:.4f} SMAPE | {result['time']:.2f} seconds")

# Find the best model based on the lowest SMAPE score
best_model_name = min(results, key=lambda k: results[k]['score'])
print(f"\n🏆 Best performing model: {best_model_name} with a score of {results[best_model_name]['score']:.4f}")

# ===================================================================


--- Benchmark Summary ---
LightGBM  : 55.2404 SMAPE | 469.42 seconds
XGBoost   : 58.4899 SMAPE | 298.68 seconds
CatBoost  : 56.1273 SMAPE | 468.35 seconds

🏆 Best performing model: LightGBM with a score of 55.2404
