# Import lib and Check Input and read

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time

from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_log_error, make_scorer

import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

import optuna

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train_path = "/kaggle/input/playground-series-s5e5/train.csv"
test_path = "/kaggle/input/playground-series-s5e5/test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

test_ids = test_df['id']

# Data Pre-Processing

In [None]:
train_df['Sex_Reversed'] = train_df['Sex'].map({'male': 1, 'female': 0})
test_df['Sex_Reversed'] = test_df['Sex'].map({'male': 1, 'female': 0})

train_df['Sex'] = train_df['Sex'].map({'male': 0, 'female': 1})
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})

train_df['Heart_Rate_pct'] = train_df['Heart_Rate'] / (220 - train_df['Age'])
test_df['Heart_Rate_pct'] = test_df['Heart_Rate'] / (220 - test_df['Age'])

train_df['BMI'] = train_df['Weight'] / (train_df['Height']/100)**2
test_df['BMI'] = test_df['Weight'] / (test_df['Height']/100)**2

train_df['BMR'] = np.where(
    train_df['Sex'] == 'female',
    10 * train_df['Weight'] + 6.25 * train_df['Height'] - 5 * train_df['Age'] - 161,
    10 * train_df['Weight'] + 6.25 * train_df['Height'] - 5 * train_df['Age'] + 5
)
test_df['BMR'] = np.where(
    test_df['Sex'] == 'female',
    10 * test_df['Weight'] + 6.25 * test_df['Height'] - 5 * test_df['Age'] - 161,
    10 * test_df['Weight'] + 6.25 * test_df['Height'] - 5 * test_df['Age'] + 5
)

train_df['TSI'] = 5 * ((train_df['Body_Temp'] - 36.5) / (41.5 - 36.5)) + 5 * ((train_df['Heart_Rate'] - 60) / ((220 - train_df['Age']) - 60))
train_df['RPE'] = train_df['Heart_Rate_pct'] + 0.1 * (train_df['Body_Temp'] - 37)
train_df['FI'] = (train_df['Heart_Rate_pct'] ** 2) / train_df['Duration']
train_df['CLI'] = (train_df['Heart_Rate'] * train_df['Duration']) / train_df['Weight']
train_df['TLI'] = ((train_df['Body_Temp'] - 36.6) ** 2) * train_df['Duration']
train_df['AMI'] = (train_df['BMR'] * train_df['Heart_Rate_pct']) / train_df['Duration']
train_df['AWI'] = (train_df['Duration'] * train_df['Heart_Rate_pct']) / train_df['Age']
train_df['WLI'] = train_df['Heart_Rate'] * train_df['Duration'] * train_df['Weight']
train_df['VO2_Proxy'] = np.where(
    train_df['Sex'] == 'female',
    (0.85 * train_df['Duration']) / (train_df['Heart_Rate_pct'] * train_df['Age']),
    (1.00 * train_df['Duration']) / (train_df['Heart_Rate_pct'] * train_df['Age']),
)
test_df['TSI'] = 5 * ((test_df['Body_Temp'] - 36.5) / (41.5 - 36.5)) + 5 * ((test_df['Heart_Rate'] - 60) / ((220 - test_df['Age']) - 60))
test_df['RPE'] = test_df['Heart_Rate_pct'] + 0.1 * (test_df['Body_Temp'] - 37)
test_df['FI'] = (test_df['Heart_Rate_pct'] ** 2) / test_df['Duration']
test_df['CLI'] = (test_df['Heart_Rate'] * test_df['Duration']) / test_df['Weight']
test_df['TLI'] = ((test_df['Body_Temp'] - 36.6) ** 2) * test_df['Duration']
test_df['AMI'] = (test_df['BMR'] * test_df['Heart_Rate_pct']) / test_df['Duration']
test_df['AWI'] = (test_df['Duration'] * test_df['Heart_Rate_pct']) / test_df['Age']
test_df['WLI'] = test_df['Heart_Rate'] * test_df['Duration'] * test_df['Weight']
test_df['VO2_Proxy'] = np.where(
    test_df['Sex'] == 'female',
    (0.85 * test_df['Duration']) / (test_df['Heart_Rate_pct'] * test_df['Age']),
    (1.00 * test_df['Duration']) / (test_df['Heart_Rate_pct'] * test_df['Age']),
)

train_df['Duration_HR'] = train_df['Duration'] * train_df['Heart_Rate']
test_df['Duration_HR'] = test_df['Duration'] * test_df['Heart_Rate']

train_df['Duration2_HR'] = (train_df['Duration'])**2 * train_df['Heart_Rate']
test_df['Duration2_HR'] = (test_df['Duration'])**2 * test_df['Heart_Rate']

train_df['Intensity'] = train_df['Heart_Rate'] / train_df['Duration']
test_df['Intensity'] = test_df['Heart_Rate'] / test_df['Duration']

for f1 in ['Duration', 'Heart_Rate', 'Body_Temp']:
        for f2 in ['Sex', 'Sex_Reversed']:
            train_df[f'{f1}_x_{f2}'] = train_df[f1] * train_df[f2]
for f1 in ['Duration', 'Heart_Rate', 'Body_Temp']:
        for f2 in ['Sex', 'Sex_Reversed']:
            test_df[f'{f1}_x_{f2}'] = test_df[f1] * test_df[f2]

train_df['Body_Temp'] = train_df['Body_Temp'] - 37.0
test_df['Body_Temp'] = test_df['Body_Temp'] - 37.0

# for col in ['Height', 'Weight', 'Heart_Rate', 'Body_Temp']:
#         for agg in ['min', 'max']:
#             agg_val = train_df.groupby('Sex')[col].agg(agg).rename(f'Sex_{col}_{agg}')
#             train_df = train_df.merge(agg_val, on='Sex', how='left')
# for col in ['Height', 'Weight', 'Heart_Rate', 'Body_Temp']:
#         for agg in ['min', 'max']:
#             agg_val = test_df.groupby('Sex')[col].agg(agg).rename(f'Sex_{col}_{agg}')
#             test_df = test_df.merge(agg_val, on='Sex', how='left')

# Calculate 'Heart_Rate_Ratio' for the training data
train_df['Heart_Rate_Ratio'] = train_df['Heart_Rate'] / train_df['Age']
# Calculate 'Heart_Rate_Ratio' for the testing data
test_df['Heart_Rate_Ratio'] = test_df['Heart_Rate'] / test_df['Age']

# Calculate 'Weight_x_Duration' for the training data
train_df['Weight_x_Duration'] = train_df['Weight'] * train_df['Duration']
# Calculate 'Weight_x_Duration' for the testing data
test_df['Weight_x_Duration'] = test_df['Weight'] * test_df['Duration']

# Calculate 'Height_x_Duration' for the training data
train_df['Height_x_Duration'] = train_df['Height'] * train_df['Duration']
# Calculate 'Height_x_Duration' for the testing data
test_df['Height_x_Duration'] = test_df['Height'] * test_df['Duration']

# Calculate 'Weight_x_Height' for the training data
train_df['Weight_x_Height'] = train_df['Weight'] * train_df['Height']
# Calculate 'Weight_x_Height' for the testing data
test_df['Weight_x_Height'] = test_df['Weight'] * test_df['Height']

# Calculate 'Weight_x_Intensity' for the training data
train_df['Weight_x_Intensity'] = train_df['Weight'] * train_df['Intensity']
# Calculate 'Weight_x_Intensity' for the testing data
test_df['Weight_x_Intensity'] = test_df['Weight'] * test_df['Intensity']

# Calculate 'Height_x_Intensity' for the training data
train_df['Height_x_Intensity'] = train_df['Height'] * train_df['Intensity']
# Calculate 'Height_x_Intensity' for the testing data
test_df['Height_x_Intensity'] = test_df['Height'] * test_df['Intensity']

train_df.drop(columns=['Sex_Reversed'], inplace=True)
test_df.drop(columns=['Sex_Reversed'], inplace=True)

train_df.drop(columns=['id'], inplace=True)
test_df.drop(columns=['id'], inplace=True)

## Features

In [None]:
features = train_df.columns.tolist()
features.remove('Calories')
print(features)

## Scaling Numeric Features

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

In [None]:
print(train_df.head())
print(train_df.tail())

print(test_df.head())
print(test_df.tail())

In [None]:
X_train = train_df[features]
y_train = train_df['Calories']

X_test = test_df[features]

X_train = X_train.fillna(0) # Fill NaNs in training features
X_test = X_test.fillna(0) # Fill NaNs in test features

# Functions

## RMSLE Scorer

In [None]:
def rmsle_scorer(y_true, y_pred):
    y_pred_positive = np.maximum(y_pred, 0.001) 
    return np.sqrt(mean_squared_log_error(y_true, y_pred_positive))

## KFold CV

In [None]:
def run_kfold_cv(X, y, model, model_name, n_splits, random_state=42):
    print(f"\n--- Starting {n_splits}-Fold Cross-Validation for {model_name} ---")
    start_cv_time = time.time() # Start timing for the entire CV process

    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    fold_rmsle_scores = []
    oof_predictions = np.zeros(X.shape[0]) 
    
    fold_times = [] 

    # Iterate through each fold
    for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
        fold_start_time = time.time() # Start timing for the current fold

        # Split data for the current fold
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]
        
        model_fold = model.__class__(**model.get_params()) 
        model_fold.fit(X_train_fold, y_train_fold) # Train the model on the training fold
        
        val_preds = model_fold.predict(X_val_fold) # Make predictions on the validation fold
        
        val_preds[val_preds < 0] = 0.001 
        
        # Store out-of-fold predictions
        oof_predictions[val_idx] = val_preds

        # Evaluate the model's performance on the validation set for this fold using RMSLE
        try:
            fold_rmsle = np.sqrt(mean_squared_log_error(y_val_fold, val_preds))
            fold_rmsle_scores.append(fold_rmsle)
        except ValueError as e:
            print(f"  Warning: Error calculating RMSLE for Fold {fold + 1} ({model_name}): {e}. Setting RMSLE to NaN.")
            fold_rmsle_scores.append(np.nan)

        fold_end_time = time.time() # End timing for the current fold
        fold_duration = fold_end_time - fold_start_time
        fold_times.append(fold_duration)

    end_cv_time = time.time() # End timing for the entire CV process
    total_cv_time = end_cv_time - start_cv_time

    # Summarize results
    valid_fold_rmsle_scores = [s for s in fold_rmsle_scores if not np.isnan(s)]
    
    mean_cv_rmsle = np.nan
    std_cv_rmsle = np.nan
    overall_oof_rmsle = np.nan

    if valid_fold_rmsle_scores:
        mean_cv_rmsle = np.mean(valid_fold_rmsle_scores)
        std_cv_rmsle = np.std(valid_fold_rmsle_scores)
        print(f"\n--- {n_splits}-Fold CV Summary for {model_name} ---")
        print(f"Average RMSLE: {mean_cv_rmsle:.4f} +/- {std_cv_rmsle:.4f}")
    else:
        print(f"\n--- {n_splits}-Fold CV Summary for {model_name} ---")
        print(f"RMSLE calculation failed for all folds.")

    # Calculate overall OOF RMSLE if possible
    if y.min() >= 0 and oof_predictions.min() >= 0 and valid_fold_rmsle_scores:
        try:
            overall_oof_rmsle = np.sqrt(mean_squared_log_error(y, oof_predictions))
            print(f"Overall OOF RMSLE: {overall_oof_rmsle:.4f}")
        except ValueError as e:
            print(f"Error calculating Overall OOF RMSLE for {model_name}: {e}. Ensure target and predictions are non-negative.")
    
    return {
        'Model': model_name,
        'N_Splits': n_splits,
        'Average RMSLE': mean_cv_rmsle,
        'Std RMSLE': std_cv_rmsle,
        'Overall OOF RMSLE': overall_oof_rmsle,
        'Total CV Time (s)': total_cv_time,
        'Avg Fold Time (s)': np.mean(fold_times) if fold_times else np.nan
    }

# Hyperparameters

In [None]:
n_splits_list = [5]#, 10, 15]#, 30, 50]

RANDOM_STATE = 42

## Optuna

In [None]:
# --- General Optuna Objective Function ---
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import train_test_split
def general_objective(trial, model_class, model_name_prefix):
    """
    General objective function for Optuna to optimize various booster models.
    Minimizes the average RMSLE from 5-fold cross-validation.
    """
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'random_state': RANDOM_STATE,
        'n_jobs': -1,
    }

    if model_class == xgb.XGBRegressor:
        params['objective'] = 'reg:squarederror'
        params['eval_metric'] = 'rmse'
        params['min_child_weight'] = trial.suggest_int('min_child_weight', 1, 10)
        params['colsample_bytree'] = trial.suggest_float('colsample_bytree', 0.6, 1.0)
        params['gamma'] = trial.suggest_float('gamma', 1e-8, 1.0, log=True)
        params['lambda'] = trial.suggest_float('lambda', 1e-8, 1.0, log=True) # reg_lambda
        params['alpha'] = trial.suggest_float('alpha', 1e-8, 1.0, log=True)   # reg_alpha
        # params['tree_method'] = 'gpu_hist'
        # params['predictor'] = 'gpu_predictor'

    model = model_class(**params)
    
    # cv_results = run_kfold_cv(X_train, y_train, model, model_name_prefix, n_splits=5, random_state=RANDOM_STATE)
    
    # return cv_results['Average RMSLE']
    
    # Perform train-validation split
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    
    # Train the model
    model.fit(X_tr, y_tr)
    
    # Predict on validation set
    y_pred = model.predict(X_val)
    
    # Ensure non-negative predictions for RMSLE
    y_pred = np.clip(y_pred, 0, None)
    
    # Calculate RMSLE
    rmsle = np.sqrt(mean_squared_log_error(y_val, y_pred))
    
    return rmsle #cv_results['Average RMSLE']

In [None]:
# # --- Optuna Optimization for XGBoost ---
# print("\n" + "="*80)
# print("Starting Optuna Hyperparameter Optimization for XGBoost")
# print("="*80)

# study_xgb = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE + 1))
# study_xgb.optimize(lambda trial: general_objective(trial, xgb.XGBRegressor, "XGBoost_Optuna"), n_trials=50, show_progress_bar=True)

# print("\nOptuna XGBoost optimization finished.")
# print("Number of finished trials: ", len(study_xgb.trials))
# print("Best trial (XGBoost):")
# trial_xgb = study_xgb.best_trial

# print("  Value (Avg RMSLE): ", trial_xgb.value)
# print("  Params: ")
# for key, value in trial_xgb.params.items():
#     print(f"    {key}: {value}")

# best_xgb_params = trial_xgb.params
# best_xgb_params['objective'] = 'reg:squarederror'
# best_xgb_params['eval_metric'] = 'rmse'
# best_xgb_params['random_state'] = RANDOM_STATE
# # best_xgb_params['tree_method'] = 'gpu_hist' # Ensure GPU is set for the final model if available
# # best_xgb_params['predictor'] = 'gpu_predictor' # Ensure GPU is set for the final model if available
# best_xgb_params['n_jobs'] = -1

# Models

In [None]:
models = {
    # "XGBoost Regressor": xgb.XGBRegressor(**study_xgb.best_params),
    "XGBoost Regressor": xgb.XGBRegressor(
    n_estimators=633,  # Start with a large number and use early stopping during training
    learning_rate=0.014465037740657534,  # Start with a value in the suggested range (0.01 to 0.05)
    max_depth=10,         # Start in the suggested range (4 to 10)
    colsample_bytree=0.7126992343210137, # Start in the suggested range (0.6 to 1.0). Lower if many features.
    subsample=0.834441652086676,       # Start in the suggested range (0.6 to 1.0)
    min_child_weight=2,
    reg_alpha=1.9027210521909599e-07,         # You might want to tune this (e.g., 0 to 5)
    # reg_lambda=1,        # You might want to tune this (e.g., 0 to 5)
    # reg_gamma=6.358507422599293e-06,
    reg_lambda=0.12835242746436062,
    random_state=42,
    n_jobs=-1
),
}

# KFold and Train and Save CSV

In [None]:
# List to store all CV results for comparison table
all_cv_results = []

# --- Run K-Fold CV for Each Model ---
for model_name, model_instance in models.items():
    print(f"\n{'='*80}\nRunning K-Fold Cross-Validation for: {model_name}\n{'='*80}")
    for n_splits_val in n_splits_list:
        results = run_kfold_cv(X_train, y_train, model_instance, model_name, n_splits_val, RANDOM_STATE)
        all_cv_results.append(results)
    
    # --- Train on Full X_train and Predict on X_test (after CV for this model) ---
    print(f"\n--- Training {model_name} on full X_train and predicting on X_test ---")
    final_model = model_instance.__class__(**model_instance.get_params()) # Create a fresh instance for final training
    
    start_time_full_train = time.time()
    final_model.fit(X_train, y_train)
    end_time_full_train = time.time()
    print(f"Full training complete in {(end_time_full_train - start_time_full_train):.4f} seconds.")

    start_time_predict = time.time()
    predictions_test = final_model.predict(X_test)
    end_time_predict = time.time()
    print(f"Predictions made in {(end_time_predict - start_time_predict):.4f} seconds.")

    # Handle negative predictions for submission file
    predictions_test[predictions_test < 0] = 0.001 #np.abs(predictions_test)
    print(predictions_test)

    # Save predictions to CSV
    submission_df = pd.DataFrame({'id': test_ids, 'Predictions': predictions_test})
    csv_filename = f'{model_name.replace(" ", "_")}_predictions.csv'
    submission_df.to_csv(csv_filename, index=False)
    print(f"Submission file '{csv_filename}' created successfully.")

    # Print feature importances if available (for tree-based models)
    if hasattr(final_model, 'feature_importances_'):
        print(f"Feature Importances for {model_name}:")
        # Map feature importances to original feature names
        feature_importances_df = pd.DataFrame({
            'Feature': X_train.columns,
            'Importance': final_model.feature_importances_
        }).sort_values(by='Importance', ascending=False)
        print(feature_importances_df.to_string(index=False))
    print(f"{'-'*80}") # Separator after each model's full training/prediction

# Final Comparison Table

In [None]:
# Final Comparison Table
print("\n" + "="*120) # Adjusted width for new columns and more models
print("                                Cross-Validation Summary Across Different Models and Folds                                ")
print("="*120)

# Create a DataFrame from the results for a nice tabular output
results_df = pd.DataFrame(all_cv_results)

# Sort for better comparison: by Model, then by N_Splits
results_df = results_df.sort_values(by=['Model', 'N_Splits']).reset_index(drop=True)

# Format the numerical columns for better readability
results_df['Average RMSLE'] = results_df['Average RMSLE'].map('{:.4f}'.format)
results_df['Std RMSLE'] = results_df['Std RMSLE'].map('{:.4f}'.format)
results_df['Overall OOF RMSLE'] = results_df['Overall OOF RMSLE'].map('{:.4f}'.format)
results_df['Total CV Time (s)'] = results_df['Total CV Time (s)'].map('{:.4f}'.format)
results_df['Avg Fold Time (s)'] = results_df['Avg Fold Time (s)'].map('{:.4f}'.format)

# Print the DataFrame
print(results_df.to_string(index=False))
print("="*120)


# NN

In [None]:
X_train.head()

In [None]:
!pip install -q pytorch-tabnet2
# Import necessary libraries
import pandas as pd
from pytorch_tabnet import TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch import nn
import torch

In [None]:
# Load data
train_tabnn = train_df.copy() #pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")
test_tabnn = test_df.copy() #pd.read_csv("/kaggle/input/playground-series-s5e5/test.csv")
sample_submission_tabnn = pd.read_csv("/kaggle/input/playground-series-s5e5/sample_submission.csv")

In [None]:
# tst_ids = test_tabnn["id"]
tst_ids = pd.read_csv("/kaggle/input/playground-series-s5e5/test.csv")["id"]
# trn_ids = train_tabnn["id"]
trn_ids = pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")["id"]

cals = pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")["Calories"]


In [None]:

start_time_predict = time.time()
predictions_train = final_model.predict(X_train)
end_time_predict = time.time()
print(f"Train Predictions made in {(end_time_predict - start_time_predict):.4f} seconds.")

# Handle negative predictions for submission file
predictions_train[predictions_train < 0] = 0.001 #np.abs(predictions_test)
print(predictions_train)

# Save predictions to CSV
trn_df = pd.DataFrame({'id': trn_ids, 'Predictions': predictions_train})
csv_filename = f'{model_name.replace(" ", "_")}_train_predictions.csv'
trn_df.to_csv(csv_filename, index=False)
print(f"Submission file '{csv_filename}' created successfully.")

In [None]:
trn_df.head()

In [None]:
train_tabnn["Predictions"] = predictions_train

In [None]:
train_tabnn.head()

In [None]:
xgb_tabnn = pd.read_csv("/kaggle/working/XGBoost_Regressor_train_predictions.csv")

# train_tabnn['Sex'] = train_tabnn['Sex'].map({'male': 0, 'female': 1})
# test_tabnn['Sex'] = test_tabnn['Sex'].map({'male': 0, 'female': 1})

In [None]:
train_tabnn.head()

In [None]:
test_tabnn["Predictions"] = predictions_test

In [None]:
test_tabnn.head()

In [None]:
# train_tabnn.fillna(train_tabnn.median(), inplace=True)
# test_tabnn.fillna(test_tabnn.median(), inplace=True)
# sample_submission_tabnn.fillna(sample_submission_tabnn.median(), inplace=True)
# xgb_tabnn.fillna(xgb_tabnn.median(), inplace=True)

# print(train_tabnn.head())

In [None]:
# Encode the categorical variable 'sex'
# cat_features_tabnn = ['Sex']
# for col in cat_features_tabnn:
#     encoder_tabnn = LabelEncoder()
#     train_tabnn[col] = encoder_tabnn.fit_transform(train_tabnn[col])
#     test_tabnn[col] = encoder_tabnn.transform(test_tabnn[col])

# train_tabnn['preds']= xgb_tabnn["Predictions"]

# Drop the 'id' column from the features
X_train_tabnn, X_val_tabnn, y_train_tabnn, y_val_tabnn = train_test_split(
    train_tabnn, cals, test_size=0.1, random_state=42
)

print(X_train_tabnn.head())

In [None]:
class MSLELoss(nn.Module):
    """
    Calculates the Mean Squared Logarithmic Error (MSLE) between
    predictions and targets.

    MSLE = mean( (log(prediction + 1) - log(target + 1))^2 )

    Args:
        epsilon (float): A small value added to prediction and target
                         before taking the logarithm and clamping to
                         prevent log(0) or log(<negative>).
                         Ensures the input to log is >= epsilon.
                         Default: 1e-8
    """
    def __init__(self, epsilon: float = 1e-8):
        super().__init__()
        # Ensure epsilon is positive
        assert epsilon > 0, "epsilon must be positive"
        self.epsilon = epsilon
        # Using built-in MSELoss to calculate the mean squared error
        # of the log-transformed values.
        self.mse = nn.MSELoss()

    def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
        """
        Calculates the forward pass for MSLE.

        Args:
            y_pred (torch.Tensor): The predicted values from the model.
                                   Expected to be raw outputs (non-negative).
            y_true (torch.Tensor): The ground truth target values.
                                   Expected to be non-negative.

        Returns:
            torch.Tensor: The calculated MSLE loss (scalar).
        """
        # Ensure inputs have the same shape
        if y_pred.shape != y_true.shape:
            raise ValueError(
                f"Input shapes must match. Got pred: {y_pred.shape}, true: {y_true.shape}"
            )

        # Ensure inputs are non-negative (optional but good practice for MSLE context)
        # If predictions can be negative, clamping is crucial.
        # Clamping predictions ensures log input is valid even if model outputs < -1
        y_pred_clamped = torch.clamp(y_pred, min=0.)
        # Targets are usually assumed non-negative for MSLE
        y_true_clamped = torch.clamp(y_true, min=0.)


        # Add 1, clamp to ensure input >= epsilon, then take log
        # Clamping *after* adding 1 is important
        log_pred = torch.log(torch.clamp(y_pred_clamped + 1, min=self.epsilon))
        log_true = torch.log(torch.clamp(y_true_clamped + 1, min=self.epsilon))

        # Calculate the Mean Squared Error between the log-transformed values
        loss = self.mse(log_pred, log_true)

        return loss

In [None]:
model = TabNetRegressor(cat_idxs=[0],cat_dims=[2])
model.fit(
    X_train_tabnn.values,
    y_train_tabnn.values.reshape(-1,1),
    eval_set=[(X_val_tabnn.values, y_val_tabnn.values.reshape(-1,1))],
    eval_metric=['rmsle'],
    max_epochs=300,
    patience=50,
    batch_size=1024*64,
    loss_fn = MSLELoss()
)

In [None]:
# Make predictions
y_test_tabnn = model.predict(test_tabnn.drop(['id',], axis=1).values)[:, 0]
y_test_tabnn = np.clip(y_test_tabnn,0,999999)

In [None]:
# Create submission file
submission = pd.DataFrame({"id": test_tabnn["id"], "Predictions": y_test_tabnn})
submission.to_csv("XGB_NN_submission.csv", index=False)

print(submission)