In [2]:
# PySpark imports
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.functions import col, when, regexp_extract
from pyspark.sql.types import FloatType, StringType, VectorUDT
from pyspark.sql.window import Window

# PySpark MLlib imports
from pyspark.ml import Pipeline
from pyspark.ml.feature import (
    VectorAssembler, StringIndexer, OneHotEncoder, Tokenizer, 
    HashingTF, IDF, PolynomialExpansion, StopWordsRemover
)

# Spark NLP imports
from sparknlp.base import *
from sparknlp.annotator import (
    Tokenizer as NLPTokenizer, ViveknSentimentModel, BertSentenceEmbeddings
)
from sparknlp.pretrained import PretrainedPipeline

# Standard MLflow imports
import mlflow

# Scikit-Learn imports
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor

# XGBoost and LightGBM imports
import xgboost as xgb
import lightgbm as lgb

# Standard libraries
import numpy as np
import time
import itertools


# Start Spark session with appropriate parallelism
spark = SparkSession.builder \
    .master("local[*]") \
    .config("spark.sql.shuffle.partitions", "50") \
    .config("spark.driver.memory", "12g") \
    .config("spark.executor.memory", "12g") \
    .config("spark.executor.memoryOverhead", "1g") \
    .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=35 -XX:ConcGCThreads=4") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .config("spark.default.parallelism", "4") \
    .config("spark.driver.maxResultSize", "2g") \
    .config("spark.executor.cores", "2") \
    .config("spark.executor.instances", "2") \
    .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0") \
    .getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

df = spark.read.parquet('processed_data.parquet')

In [7]:
from pyspark.ml.linalg import Vectors, DenseVector
from pyspark.ml.functions import vector_to_array
def array_to_dense_vector(array):
    return Vectors.dense(array) if array else Vectors.dense([])

array_to_vector_udf = F.udf(array_to_dense_vector, VectorUDT())
df = df.withColumn("author_publisher_combined_embedding", array_to_vector_udf(F.flatten("author_publisher_combined_embedding")))
columns_to_drop = [
    "authors",
    "publisher",
    "published_era",
    "categories"
]

# Drop the columns
df_cleaned = df.drop(*columns_to_drop)

dense_vector_udf = F.udf(lambda v: Vectors.dense(v.toArray()) if v is not None else None, VectorUDT())

df = df.withColumn("title_tfidf_dense", dense_vector_udf(F.col("title_tfidf")))
df = df.withColumn("desc_tfidf_dense", dense_vector_udf(F.col("desc_tfidf")))
df = df.drop("title_tfidf", "desc_tfidf")
df.printSchema()
# Define final feature columns
feature_columns = [
    "publishedYear",
    "book_age",
    "published_decade",
    "published_century",
    "author_frequency",
    "publisher_frequency",
    "published_era_encoded",
    "Category_Index",
    "title_sentiment_encoded",
    "description_sentiment_encoded",
    "title_tfidf_dense",
    "desc_tfidf_dense",
    "author_publisher_combined_embedding"
]

# Filter out any columns that do not exist in the DataFrame
available_feature_columns = [col for col in feature_columns if col in df.columns]

# # Assemble all available features into a single vector column
assembler = VectorAssembler(inputCols=available_feature_columns, outputCol="assembled_features")
df_assembled = assembler.transform(df)

root
 |-- authors: string (nullable = false)
 |-- publisher: string (nullable = true)
 |-- Title: string (nullable = false)
 |-- description: string (nullable = false)
 |-- categories: string (nullable = true)
 |-- Impact: double (nullable = true)
 |-- publishedYear: integer (nullable = true)
 |-- book_age: integer (nullable = true)
 |-- published_decade: integer (nullable = true)
 |-- published_century: integer (nullable = true)
 |-- published_era: string (nullable = false)
 |-- title_tfidf: vector (nullable = true)
 |-- desc_tfidf: vector (nullable = true)
 |-- published_era_index: double (nullable = false)
 |-- published_era_encoded: vector (nullable = true)
 |-- author_frequency: long (nullable = true)
 |-- publisher_frequency: long (nullable = true)
 |-- author_publisher_combined_embedding: vector (nullable = true)
 |-- Category_Index: vector (nullable = true)
 |-- title_sentiment_encoded: integer (nullable = false)
 |-- description_sentiment_encoded: integer (nullable = false)

r

# Model Selection

In [8]:
# Split the data
train_df, val_df, test_df = df_assembled.randomSplit([0.7, 0.15, 0.15], seed=42)

# Convert features to array type for compatibility
train_df = train_df.withColumn("assembled_features_array", vector_to_array(col("assembled_features")))
val_df = val_df.withColumn("assembled_features_array", vector_to_array(col("assembled_features")))
test_df = test_df.withColumn("assembled_features_array", vector_to_array(col("assembled_features")))

# Extract data for LightGBM
X_train = np.array(train_df.select("assembled_features_array").collect()).squeeze()
y_train = np.array(train_df.select("Impact").collect()).squeeze()
X_val = np.array(val_df.select("assembled_features_array").collect()).squeeze()
y_val = np.array(val_df.select("Impact").collect()).squeeze()
X_test = np.array(test_df.select("assembled_features_array").collect()).squeeze()
y_test = np.array(test_df.select("Impact").collect()).squeeze()


In [16]:
mlflow.set_experiment("Model Selections")

2024/11/07 10:10:41 INFO mlflow.tracking.fluent: Experiment with name 'Model Selections' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///content/mlruns/624289668838002876', creation_time=1730974241949, experiment_id='624289668838002876', last_update_time=1730974241949, lifecycle_stage='active', name='Model Selections', tags={}>

In [17]:
# Define LightGBM parameters
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.1,
    'max_depth': 5,
    'n_estimators': 10,
    'random_state': 42
}

# Start MLflow experiment
mlflow.set_experiment("LightGBM Model")

with mlflow.start_run(run_name="LightGBM_CV"):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_scores, mape_scores = [], []
    rmse_train_scores, mape_train_scores = [], []
    feature_importance_agg = np.zeros(X_train.shape[1])

    # Track training time
    start_time = time.time()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Initialize and train the model
        train_data = lgb.Dataset(X_train_fold, label=y_train_fold)
        val_data = lgb.Dataset(X_val_fold, label=y_val_fold, reference=train_data)
        model = lgb.train(lgb_params, train_data, valid_sets=[val_data])

        # Training predictions and metrics for this fold
        y_train_pred = model.predict(X_train_fold)
        rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
        rmse_train_scores.append(rmse_train)
        mape_train_scores.append(mape_train)

        # Predictions and metrics for this fold
        y_val_pred = model.predict(X_val_fold)
        rmse = mean_squared_error(y_val_fold, y_val_pred, squared=False)
        mape = mean_absolute_percentage_error(y_val_fold, y_val_pred)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

        # Aggregate feature importances
        feature_importance_agg += model.feature_importance(importance_type="gain")

    # Training time
    training_time = time.time() - start_time

    # Log metrics and parameters
    mlflow.log_param("learning_rate", lgb_params['learning_rate'])
    mlflow.log_param("max_depth", lgb_params['max_depth'])
    mlflow.log_param("n_estimators", lgb_params['n_estimators'])
    mlflow.log_metric("rmse_cv", np.mean(rmse_scores))
    mlflow.log_metric("mape_cv", np.mean(mape_scores))
    mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
    mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))

    # Feature importance
    feature_importance = feature_importance_agg / kf.get_n_splits()
    mlflow.log_param("feature_importance", dict(enumerate(feature_importance)))

    print(f"Training completed - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)} RMSE train: {np.mean(rmse_train_scores)}, MAPE train: {np.mean(mape_train_scores)}")




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.654520 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69014
[LightGBM] [Info] Number of data points in the train set: 77747, number of used features: 326
[LightGBM] [Info] Start training from score 5.328786




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.715988 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69041
[LightGBM] [Info] Number of data points in the train set: 77747, number of used features: 326
[LightGBM] [Info] Start training from score 5.328253




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.673728 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69016
[LightGBM] [Info] Number of data points in the train set: 77747, number of used features: 326
[LightGBM] [Info] Start training from score 5.327857




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.610933 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69017
[LightGBM] [Info] Number of data points in the train set: 77747, number of used features: 326
[LightGBM] [Info] Start training from score 5.328652




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.657201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69016
[LightGBM] [Info] Number of data points in the train set: 77748, number of used features: 326
[LightGBM] [Info] Start training from score 5.327835
Training completed - RMSE CV: 0.2724368893593371, MAPE CV: 245038356439.3641 RMSE train: 0.27143035992766656, MAPE train: 241114979674.61017




In [18]:
# Define XGBoost parameters
xgb_params = {
    'objective': 'reg:squarederror',
    'learning_rate': 0.1,
    'max_depth': 5,
    'n_estimators': 10,
    'eval_metric': 'rmse',
    'random_state':42
}
# Start MLflow experiment
mlflow.set_experiment("XGBoost Model")

with mlflow.start_run(run_name="XGBoost_CV"):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_scores, mape_scores = [], []
    rmse_train_scores, mape_train_scores = [], []
    feature_importance_agg = np.zeros(X_train.shape[1])

    # Track training time
    start_time = time.time()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Initialize and train the model
        model = xgb.XGBRegressor(**xgb_params)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)], verbose=False)

        # Training predictions and metrics for this fold
        y_train_pred = model.predict(X_train_fold)
        rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
        rmse_train_scores.append(rmse_train)
        mape_train_scores.append(mape_train)
        # Predictions and metrics for this fold
        y_val_pred = model.predict(X_val_fold)
        rmse = mean_squared_error(y_val_fold, y_val_pred, squared=False)
        mape = mean_absolute_percentage_error(y_val_fold, y_val_pred)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

        # Aggregate feature importances
        feature_importance_agg += model.feature_importances_

    # Training time
    training_time = time.time() - start_time

    # Log metrics and parameters
    mlflow.log_param("learning_rate", xgb_params['learning_rate'])
    mlflow.log_param("max_depth", xgb_params['max_depth'])
    mlflow.log_param("n_estimators", xgb_params['n_estimators'])
    mlflow.log_metric("rmse_cv", np.mean(rmse_scores))
    mlflow.log_metric("mape_cv", np.mean(mape_scores))
    mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
    mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))

    # Feature importance
    feature_importance = feature_importance_agg / kf.get_n_splits()
    mlflow.log_param("feature_importance", dict(enumerate(feature_importance)))

    print(f"Training completed - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)} RMSE train: {np.mean(rmse_train_scores)}, MAPE train: {np.mean(mape_train_scores)}")



Training completed - RMSE CV: 0.2723380424942968, MAPE CV: 244950474942.54712 RMSE train: 0.2711322116207555, MAPE train: 226596335949.44946


In [19]:
# Define Decision Tree parameters
dt_params = {
    'max_depth': 10,
    'min_samples_split': 5,
    'random_state': 42
}

# Start MLflow experiment
mlflow.set_experiment("Decision Tree Model")

with mlflow.start_run(run_name="DecisionTree_CV"):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_scores = []  # Corrected: initialized as individual lists
    mape_scores = []  # Corrected: initialized as individual lists
    rmse_train_scores, mape_train_scores = [], []

    # Track training time
    start_time = time.time()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Initialize and train the model
        model = DecisionTreeRegressor(**dt_params)
        model.fit(X_train_fold, y_train_fold)
        # Training predictions and metrics for this fold
        y_train_pred = model.predict(X_train_fold)
        rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
        rmse_train_scores.append(rmse_train)
        mape_train_scores.append(mape_train)
        # Predictions and metrics for this fold
        y_val_pred = model.predict(X_val_fold)
        rmse = mean_squared_error(y_val_fold, y_val_pred, squared=False)
        mape = mean_absolute_percentage_error(y_val_fold, y_val_pred)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Training time
    training_time = time.time() - start_time

    # Log metrics and parameters
    mlflow.log_param("max_depth", dt_params['max_depth'])
    mlflow.log_param("min_samples_split", dt_params['min_samples_split'])
    mlflow.log_metric("rmse_cv", np.mean(rmse_scores))
    mlflow.log_metric("mape_cv", np.mean(mape_scores))
    mlflow.log_metric("training_time", training_time)
    mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
    mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))

    print(f"Training completed - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)} RMSE train: {np.mean(rmse_train_scores)}, MAPE train: {np.mean(mape_train_scores)}")




Training completed - RMSE CV: 0.2756366994079403, MAPE CV: 244485444975.64517 RMSE train: 0.261037821453372, MAPE train: 178763921900.99408




In [20]:
# Function to train a model with cross-validation and log metrics
def train_model_with_cv(model, model_name):
    with mlflow.start_run(run_name=f"{model_name}_CV"):
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        rmse_scores, mape_scores = [], []
        rmse_train_scores, mape_train_scores = [], []
        # Track training time
        start_time = time.time()

        for train_index, val_index in kf.split(X_train):
            X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
            y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

            # Train the model on the fold
            model.fit(X_train_fold, y_train_fold)

            # Training predictions and metrics for this fold
            y_train_pred = model.predict(X_train_fold)
            rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
            mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
            rmse_train_scores.append(rmse_train)
            mape_train_scores.append(mape_train)
            # Predictions and metrics for this fold
            y_val_pred = model.predict(X_val_fold)
            rmse = mean_squared_error(y_val_fold, y_val_pred, squared=False)
            mape = mean_absolute_percentage_error(y_val_fold, y_val_pred)
            rmse_scores.append(rmse)
            mape_scores.append(mape)

        # Training time
        training_time = time.time() - start_time

        # Log metrics and parameters to MLflow
        mlflow.log_metric("rmse_cv", np.mean(rmse_scores))
        mlflow.log_metric("mape_cv", np.mean(mape_scores))
        mlflow.log_metric("training_time", training_time)
        mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
        mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))
        print(f"{model_name} - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)}  RMSE train: {np.mean(rmse_train_scores)}, MAPE train: {np.mean(mape_train_scores)}")

# Initialize models with configurations
linear_model = LinearRegression()
ridge_model = Ridge(alpha=1.0)  # You can adjust alpha for Ridge regularization strength
lasso_model = Lasso(alpha=0.1)  # You can adjust alpha for Lasso regularization strength

# Train and log each model
train_model_with_cv(linear_model, "LinearRegression")
train_model_with_cv(ridge_model, "RidgeRegression")
train_model_with_cv(lasso_model, "LassoRegression")




LinearRegression - RMSE CV: 0.2728200299716571, MAPE CV: 242985366089.22647  RMSE train: 0.27204532681410376, MAPE train: 242588494509.6242




RidgeRegression - RMSE CV: 0.2728095199709154, MAPE CV: 243131019642.20865  RMSE train: 0.27205234064311623, MAPE train: 242703547423.73126




LassoRegression - RMSE CV: 0.27864331214264837, MAPE CV: 246907122340.8542  RMSE train: 0.2786456480827999, MAPE train: 246923185760.18823




In [21]:
# Define Random Forest parameters
rf_params = {
    'n_estimators': 10,
    'max_depth': 5,
    'min_samples_split': 5,
    'random_state': 42
}

# Start MLflow experiment
mlflow.set_experiment("Random Forest Model with Cross-Validation")

with mlflow.start_run(run_name="RandomForest_CV"):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_scores, mape_scores = [], []
    rmse_train_scores, mape_train_scores = [], []
    feature_importance_agg = np.zeros(X_train.shape[1])

    # Track training time
    start_time = time.time()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Initialize and train the model
        model = RandomForestRegressor(**rf_params)
        model.fit(X_train_fold, y_train_fold)

        # Training predictions and metrics for this fold
        y_train_pred = model.predict(X_train_fold)
        rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
        rmse_train_scores.append(rmse_train)
        mape_train_scores.append(mape_train)

        # Predictions and metrics for this fold
        y_val_pred = model.predict(X_val_fold)
        rmse = mean_squared_error(y_val_fold, y_val_pred, squared=False)
        mape = mean_absolute_percentage_error(y_val_fold, y_val_pred)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

        # Aggregate feature importances
        feature_importance_agg += model.feature_importances_

    # Training time
    training_time = time.time() - start_time

    # Log metrics and parameters
    mlflow.log_param("n_estimators", rf_params['n_estimators'])
    mlflow.log_param("max_depth", rf_params['max_depth'])
    mlflow.log_param("min_samples_split", rf_params['min_samples_split'])
    mlflow.log_metric("rmse_cv", np.mean(rmse_scores))
    mlflow.log_metric("mape_cv", np.mean(mape_scores))
    mlflow.log_metric("training_time", training_time)
    mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
    mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))

    # Average feature importance across folds
    feature_importance = feature_importance_agg / kf.get_n_splits()
    mlflow.log_param("feature_importance", dict(enumerate(feature_importance)))

    print(f"Training completed - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)} RMSE train: {np.mean(rmse_train_scores)}, MAPE train: {np.mean(mape_train_scores)}")



Training completed - RMSE CV: 0.2732579284172062, MAPE CV: 245212722344.48138 RMSE train: 0.27194544181941616, MAPE train: 245333626358.1103


In [22]:
!zip -r /content/mlruns.zip /content/mlruns

  adding: content/mlruns/ (stored 0%)
  adding: content/mlruns/624289668838002876/ (stored 0%)
  adding: content/mlruns/624289668838002876/meta.yaml (deflated 30%)
  adding: content/mlruns/564800884828740230/ (stored 0%)
  adding: content/mlruns/564800884828740230/meta.yaml (deflated 30%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/ (stored 0%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/meta.yaml (deflated 44%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/params/ (stored 0%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/artifacts/ (stored 0%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/tags/ (stored 0%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/tags/mlflow.source.name (deflated 5%)
  adding: content/mlruns/564800884828740230/9244ebea469d4550a862bab3e61f44d1/tags/mlflow.user (stored 0%)
  add

# Feature Selections

In [74]:
# Define LightGBM parameters
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.1,
    'max_depth': 5,
    'n_estimators': 10,
    'random_state': 42,
    'num_leaves': 31,
}

# Feature columns before assembling
feature_columns = [
    "publishedYear", "book_age", "published_decade", "published_century",
    "author_frequency", "publisher_frequency", "published_era_encoded",
    "Category_Index", "title_sentiment_encoded", "description_sentiment_encoded",
    "title_tfidf_dense", "desc_tfidf_dense", "author_publisher_combined_embedding"
]

# Start MLflow experiment
mlflow.set_experiment("LightGBM Model with CV Feature Importance")

with mlflow.start_run(run_name="LightGBM_CV"):
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    rmse_scores, mape_scores = [], []
    rmse_train_scores, mape_train_scores = [], []

    # Placeholder for feature importances
    feature_importance_agg = np.zeros(len(feature_columns))

    # Track training time
    start_time = time.time()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Initialize and train the model
        train_data = lgb.Dataset(X_train_fold, label=y_train_fold)
        val_data = lgb.Dataset(X_val_fold, label=y_val_fold, reference=train_data)
        model = lgb.train(lgb_params, train_data, valid_sets=[val_data])

        # Training predictions and metrics for this fold
        y_train_pred = model.predict(X_train_fold)
        rmse_train = mean_squared_error(y_train_fold, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_train_pred)
        rmse_train_scores.append(rmse_train)
        mape_train_scores.append(mape_train)

        # Validation predictions and metrics for this fold
        y_val_pred = model.predict(X_val_fold)
        rmse_val = mean_squared_error(y_val_fold, y_val_pred, squared=False)
        mape_val = mean_absolute_percentage_error(y_val_fold, y_val_pred)
        rmse_scores.append(rmse_val)
        mape_scores.append(mape_val)

        # Aggregate feature importances
        fold_importance = model.feature_importance(importance_type="gain")
        feature_importance_agg += fold_importance[:len(feature_columns)]  # Ensure alignment with `feature_columns`

        print(f"Fold {fold + 1} - RMSE train: {rmse_train}, MAPE train: {mape_train}, RMSE val: {rmse_val}, MAPE val: {mape_val}")

    # Calculate the average feature importance across folds
    avg_feature_importance = feature_importance_agg / kf.get_n_splits()
    feature_importance_dict = {feature_columns[i]: avg_feature_importance[i] for i in range(len(feature_columns))}

    # Training time
    total_training_time = time.time() - start_time
    avg_training_time = total_training_time / kf.get_n_splits()

    # Log average metrics across folds and feature importance to MLflow
    mlflow.log_metric("rmse_train_cv", np.mean(rmse_train_scores))
    mlflow.log_metric("mape_train_cv", np.mean(mape_train_scores))
    mlflow.log_metric("rmse_val_cv", np.mean(rmse_scores))
    mlflow.log_metric("mape_val_cv", np.mean(mape_scores))
    mlflow.log_metric("avg_training_time_per_fold", avg_training_time)
    mlflow.log_param("feature_importance", feature_importance_dict)

    print(f"Training completed - RMSE CV: {np.mean(rmse_scores)}, MAPE CV: {np.mean(mape_scores)}, RMSE train CV: {np.mean(rmse_train_scores)}, MAPE train CV: {np.mean(mape_train_scores)}")
    print("Feature Importance:", feature_importance_dict)




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.611293 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69066
[LightGBM] [Info] Number of data points in the train set: 87465, number of used features: 326
[LightGBM] [Info] Start training from score 5.328565




Fold 1 - RMSE train: 0.271838030000925, MAPE train: 268228462209.159, RMSE val: 0.2684144252475, MAPE val: 0.035028060329213705
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.768882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69060
[LightGBM] [Info] Number of data points in the train set: 87465, number of used features: 326
[LightGBM] [Info] Start training from score 5.328447
Fold 2 - RMSE train: 0.27148270618963805, MAPE train: 269758174807.3131, RMSE val: 0.27297460527782075, MAPE val: 0.03582248861250693




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.604135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69074
[LightGBM] [Info] Number of data points in the train set: 87465, number of used features: 326
[LightGBM] [Info] Start training from score 5.328328
Fold 3 - RMSE train: 0.27184065189960116, MAPE train: 267056597776.87573, RMSE val: 0.27043903039973505, MAPE val: 0.035499591074456376




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.549439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69073
[LightGBM] [Info] Number of data points in the train set: 87465, number of used features: 326
[LightGBM] [Info] Start training from score 5.328193
Fold 4 - RMSE train: 0.2710158050336563, MAPE train: 264163254991.12903, RMSE val: 0.27856067375358934, MAPE val: 0.036342612764467465




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.569595 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69050
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.328728
Fold 5 - RMSE train: 0.2718841580859168, MAPE train: 264346634134.68265, RMSE val: 0.2706117044945177, MAPE val: 0.03535100553635519




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.560226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69074
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.327457




Fold 6 - RMSE train: 0.2719034958117944, MAPE train: 268879648201.17908, RMSE val: 0.27034809957552497, MAPE val: 0.03541755412226965
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.855421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69059
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.328476
Fold 7 - RMSE train: 0.2713222958474873, MAPE train: 268269047419.86224, RMSE val: 0.2745721702739677, MAPE val: 0.035964886853176684




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.533476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69069
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.328410




Fold 8 - RMSE train: 0.27166147885259706, MAPE train: 266842133557.74097, RMSE val: 0.2721467776096827, MAPE val: 0.03543593714119605
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.595148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69071
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.328170




Fold 9 - RMSE train: 0.2714804741324342, MAPE train: 0.03550780871620538, RMSE val: 0.2750197339791205, MAPE val: 2455635574944.195
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.541667 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69062
[LightGBM] [Info] Number of data points in the train set: 87466, number of used features: 326
[LightGBM] [Info] Start training from score 5.327990
Fold 10 - RMSE train: 0.2717078068052201, MAPE train: 267183497280.22147, RMSE val: 0.27255743966187806, MAPE val: 0.03542494070932278
Training completed - RMSE CV: 0.27256446602733364, MAPE CV: 245563557494.4515, RMSE train CV: 0.2716136902659271, MAPE train CV: 240472745037.8199
Feature Importance: {'publishedYear': 24.844626356288792, 'book_age': 0.0, 'published_decade': 0.0, 'published_century': 0.0, 'author_frequency': 34.63394408226013, 'publisher_frequency': 0.0, 'published_era_encoded': 0.0, 'Category_Index': 0



In [76]:
mlflow.set_experiment("Feature Selections")

<Experiment: artifact_location='file:///content/mlruns/185631005378406107', creation_time=1730974982241, experiment_id='185631005378406107', last_update_time=1730974982241, lifecycle_stage='active', name='Feature Selections', tags={}>

# Hyper Parameter Tuning

In [84]:
# Define hyperparameter search space
param_space = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'num_leaves': [20, 31, 50],
    'n_estimators': [50, 100, 200]
}

mlflow.set_experiment("LightGBM Grid Search Hyperparameter Tuning")

# Function to train LightGBM and log metrics
def train_and_log_lightgbm(params):
    with mlflow.start_run(nested=True):
        train_data = lgb.Dataset(X_train, label=y_train)
        val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

        # Train LightGBM model
        model = lgb.train(
            params,
            train_data,
            valid_sets=[train_data, val_data]
        )

        # Training metrics
        y_train_pred = model.predict(X_train, num_iteration=model.best_iteration)
        rmse_train = mean_squared_error(y_train, y_train_pred, squared=False)
        mape_train = mean_absolute_percentage_error(y_train, y_train_pred)

        # Validation metrics
        y_val_pred = model.predict(X_val, num_iteration=model.best_iteration)
        rmse_val = mean_squared_error(y_val, y_val_pred, squared=False)
        mape_val = mean_absolute_percentage_error(y_val, y_val_pred)

        # Log parameters and metrics to MLflow
        mlflow.log_params(params)
        mlflow.log_metric("rmse_train", rmse_train)
        mlflow.log_metric("mape_train", mape_train)
        mlflow.log_metric("rmse_val", rmse_val)
        mlflow.log_metric("mape_val", mape_val)

        print(f"RMSE Train: {rmse_train}, MAPE Train: {mape_train}, RMSE Val: {rmse_val}, MAPE Val: {mape_val}")

        return rmse_val

# Grid search for hyperparameter tuning
def grid_search():
    best_rmse_val = float("inf")
    best_params = None

    for param_comb in itertools.product(
        param_space['learning_rate'],
        param_space['max_depth'],
        param_space['num_leaves'],
        param_space['n_estimators']
    ):
        # Prepare parameter dictionary
        params = {
            'objective': 'regression',
            'metric': 'rmse',
            'learning_rate': param_comb[0],
            'max_depth': param_comb[1],
            'num_leaves': param_comb[2],
            'n_estimators': param_comb[3],
            'random_state': 42
        }

        # Train model and log metrics
        val_rmse = train_and_log_lightgbm(params)

        # Update best parameters if validation RMSE is lower
        if val_rmse < best_rmse_val:
            best_rmse_val = val_rmse
            best_params = params

    return best_params, best_rmse_val

# Run grid search and log best results
with mlflow.start_run(run_name="Grid Search Hyperparameter Tuning"):
    best_params, best_rmse_val = grid_search()
    mlflow.log_params(best_params)
    mlflow.log_metric("best_rmse_val", best_rmse_val)

print("Best Parameters:", best_params)
print("Best Validation RMSE:", best_rmse_val)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.990961 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27650793016814323, MAPE Train: 246238181231.3274, RMSE Val: 0.274398202717372, MAPE Val: 0.03636539585780591
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.143730 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27493282358246424, MAPE Train: 244170270136.52383, RMSE Val: 0.27289960986669076, MAPE Val: 0.03607404423754977
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.002076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27275603067727466, MAPE Train: 241679654950.85266, RMSE Val: 0.2707747504048876, MAPE Val: 0.03562153742653267




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.917174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27650793016814323, MAPE Train: 246238181231.3274, RMSE Val: 0.274398202717372, MAPE Val: 0.03636539585780591
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.052249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27493282358246424, MAPE Train: 244170270136.52383, RMSE Val: 0.27289960986669076, MAPE Val: 0.03607404423754977
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.977718 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27275603067727466, MAPE Train: 241679654950.85266, RMSE Val: 0.2707747504048876, MAPE Val: 0.03562153742653267




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.217768 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27650793016814323, MAPE Train: 246238181231.3274, RMSE Val: 0.274398202717372, MAPE Val: 0.03636539585780591
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.079974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27493282358246424, MAPE Train: 244170270136.52383, RMSE Val: 0.27289960986669076, MAPE Val: 0.03607404423754977
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.992028 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27275603067727466, MAPE Train: 241679654950.85266, RMSE Val: 0.2707747504048876, MAPE Val: 0.03562153742653267




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.106367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27495506203276726, MAPE Train: 245852141731.27667, RMSE Val: 0.2730193705426714, MAPE Val: 0.03611193947504461




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.013800 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27223833649488305, MAPE Train: 241131112445.20972, RMSE Val: 0.2705091695928473, MAPE Val: 0.0356105392865679
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.967412 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2689676242160664, MAPE Train: 235927621854.73947, RMSE Val: 0.26750339851801386, MAPE Val: 0.03498266429496054




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.977886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27465659289300753, MAPE Train: 245834453104.0487, RMSE Val: 0.27285616164825427, MAPE Val: 0.0360816351947618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.018075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2717597923405952, MAPE Train: 240731925946.80496, RMSE Val: 0.2702982413443983, MAPE Val: 0.03557367262127816
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.030292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26815396517725293, MAPE Train: 233885456914.43167, RMSE Val: 0.2671925358447112, MAPE Val: 0.03494075872115504




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.056507 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2746520178282879, MAPE Train: 245834453139.7854, RMSE Val: 0.2728545035591572, MAPE Val: 0.03608081808094354
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.980893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27174543212523206, MAPE Train: 240720018472.34128, RMSE Val: 0.2702831068695271, MAPE Val: 0.03557093568825311
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.968666 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.268204864840527, MAPE Train: 234324763662.37372, RMSE Val: 0.26728108315665744, MAPE Val: 0.034953219235968155




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.979386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2742094855790572, MAPE Train: 245985161751.47388, RMSE Val: 0.27208923087286485, MAPE Val: 0.03593954718831723
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.064605 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27138425446056375, MAPE Train: 244996678132.72687, RMSE Val: 0.2693045219193584, MAPE Val: 0.03534455972490325
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.979248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26818972601093366, MAPE Train: 238350562681.05817, RMSE Val: 0.2664082854671571, MAPE Val: 0.03470252119166758
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.196403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2735236450208246, MAPE Train: 245470946819.03812, RMSE Val: 0.27158642677503947, MAPE Val: 0.035856762290733306




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.960515 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27025114696261104, MAPE Train: 243328832393.46448, RMSE Val: 0.2685270019460002, MAPE Val: 0.03522032621224429
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.975606 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26648599238689213, MAPE Train: 232695480857.91568, RMSE Val: 0.2653274094439214, MAPE Val: 0.03451784026120877
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.975259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27283342024210344, MAPE Train: 245480279140.13785, RMSE Val: 0.2712307817938275, MAPE Val: 0.03579994664198431
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.990272 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26912278969038506, MAPE Train: 238574092009.24466, RMSE Val: 0.2680977346787307, MAPE Val: 0.03515130389216675
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.867568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26460779464342116, MAPE Train: 227288231089.24515, RMSE Val: 0.26471273351334057, MAPE Val: 0.03443022324947856
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.050827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27399727246246786, MAPE Train: 246005688277.0729, RMSE Val: 0.2717444400860773, MAPE Val: 0.035884537284588625
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.940775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2712375990387581, MAPE Train: 245272812033.01797, RMSE Val: 0.2690468087411265, MAPE Val: 0.03530359542758394
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.118328 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2680401606757058, MAPE Train: 241033121624.95312, RMSE Val: 0.26611546778369544, MAPE Val: 0.0346193916127797
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.070094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.27310239057794367, MAPE Train: 245799717709.72476, RMSE Val: 0.2709409296408411, MAPE Val: 0.03571944962000244
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.998756 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26997359167836144, MAPE Train: 240828930489.21423, RMSE Val: 0.26802014067717717, MAPE Val: 0.03509809872219843
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.975189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2662980947431131, MAPE Train: 229156987609.64545, RMSE Val: 0.2649476693181868, MAPE Val: 0.0343854616852609
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.993300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2720619126160513, MAPE Train: 243424233869.3203, RMSE Val: 0.27017659636337127, MAPE Val: 0.03557487281823927
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.057549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2684416299819685, MAPE Train: 235716448532.76617, RMSE Val: 0.2671405075859163, MAPE Val: 0.034927211209247314
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.976160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26400768099878313, MAPE Train: 222593194876.70465, RMSE Val: 0.263877448246978, MAPE Val: 0.034192580846600984
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.000496 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2717759749895871, MAPE Train: 240731821142.63065, RMSE Val: 0.26968571723460616, MAPE Val: 0.03541649928229248
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.059492 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.268971303066781, MAPE Train: 239193074090.6242, RMSE Val: 0.2671012272855019, MAPE Val: 0.0348547095492646
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.948758 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26628446248290905, MAPE Train: 237840693751.21732, RMSE Val: 0.2649848796839174, MAPE Val: 0.03438317790073333




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.054003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2717759749895871, MAPE Train: 240731821142.63065, RMSE Val: 0.26968571723460616, MAPE Val: 0.03541649928229248
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.955819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.268971303066781, MAPE Train: 239193074090.6242, RMSE Val: 0.2671012272855019, MAPE Val: 0.0348547095492646
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.993417 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26628446248290905, MAPE Train: 237840693751.21732, RMSE Val: 0.2649848796839174, MAPE Val: 0.03438317790073333




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.088584 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2717759749895871, MAPE Train: 240731821142.63065, RMSE Val: 0.26968571723460616, MAPE Val: 0.03541649928229248
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.054569 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.268971303066781, MAPE Train: 239193074090.6242, RMSE Val: 0.2671012272855019, MAPE Val: 0.0348547095492646
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.988210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26628446248290905, MAPE Train: 237840693751.21732, RMSE Val: 0.2649848796839174, MAPE Val: 0.03438317790073333




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.057505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2677915278979318, MAPE Train: 235437249849.2341, RMSE Val: 0.2664750284697474, MAPE Val: 0.034765194997848886
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.109572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26422597688724003, MAPE Train: 231210023971.77304, RMSE Val: 0.26399735078541603, MAPE Val: 0.03419412042305022
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.974742 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2602637279726465, MAPE Train: 227613746688.18976, RMSE Val: 0.26252591779959533, MAPE Val: 0.03386745294568438




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.192782 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2668594391229659, MAPE Train: 232790285473.48398, RMSE Val: 0.26626005405540437, MAPE Val: 0.03471335396716618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.992436 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26280310479736824, MAPE Train: 225739264709.10458, RMSE Val: 0.26367694683266235, MAPE Val: 0.03414172220273942
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.999364 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.258374780811233, MAPE Train: 222446722654.76266, RMSE Val: 0.2622531123587967, MAPE Val: 0.0338283222910523
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.970422 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26677597749183285, MAPE Train: 233230108314.67685, RMSE Val: 0.266075060203334, MAPE Val: 0.03469727922140778
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.970472 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2629158243303631, MAPE Train: 228722704798.4122, RMSE Val: 0.26368777889604317, MAPE Val: 0.03413454302954353
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.040105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2585231814974243, MAPE Train: 225464585510.53885, RMSE Val: 0.26217322480950606, MAPE Val: 0.033825017788278386
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.960850 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26714197280191715, MAPE Train: 236121328757.06122, RMSE Val: 0.2654909628647991, MAPE Val: 0.03448186881326233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.026517 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.263873861541798, MAPE Train: 226790994947.05835, RMSE Val: 0.2634297708233972, MAPE Val: 0.034039201411325624
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.136018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2599934248741285, MAPE Train: 222411735635.30582, RMSE Val: 0.2622758514972598, MAPE Val: 0.033836069290166707
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.968346 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26527547784403116, MAPE Train: 231816385187.60992, RMSE Val: 0.26442919863618325, MAPE Val: 0.03432067801389683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.038662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2612118238265596, MAPE Train: 224302629038.19354, RMSE Val: 0.26249855713264775, MAPE Val: 0.03388516409125192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.025679 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25593183609130615, MAPE Train: 216053473105.51962, RMSE Val: 0.2614649176967421, MAPE Val: 0.03369455140640283
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.908614 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2630590065566937, MAPE Train: 223347235929.95853, RMSE Val: 0.26382085298612085, MAPE Val: 0.0342275105676294
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.993962 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2577549589116902, MAPE Train: 215925544924.6709, RMSE Val: 0.26183446326000037, MAPE Val: 0.03378741280969202
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.981818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25050217602805813, MAPE Train: 210226334535.80676, RMSE Val: 0.2608138844684909, MAPE Val: 0.03361634475634168
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.158156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26696441610545596, MAPE Train: 240638872965.44202, RMSE Val: 0.2652369671023465, MAPE Val: 0.03440945486098127
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.988537 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2636621194488757, MAPE Train: 229053328948.89645, RMSE Val: 0.26331618032918275, MAPE Val: 0.033997469460077166
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.099120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25975684210143357, MAPE Train: 221635354000.02676, RMSE Val: 0.2622561301682129, MAPE Val: 0.03381293155165305
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.984489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26504138994155496, MAPE Train: 224490556973.13425, RMSE Val: 0.2640339141083397, MAPE Val: 0.03417542087885316
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.174076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26099968809209145, MAPE Train: 213772573114.70163, RMSE Val: 0.26234867028403763, MAPE Val: 0.03381995794174083




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.135664 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25577163940882386, MAPE Train: 208467304548.44064, RMSE Val: 0.2615082888235576, MAPE Val: 0.03369191688679296
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.961505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26249288160645456, MAPE Train: 217402765784.03864, RMSE Val: 0.2632049038812187, MAPE Val: 0.03402093914193457
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.371953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2570469345735455, MAPE Train: 206650151539.477, RMSE Val: 0.2617760059629203, MAPE Val: 0.0337404383090981
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.986190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24956647368665866, MAPE Train: 200181053566.1262, RMSE Val: 0.261064410016018, MAPE Val: 0.03363158396163742
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.974872 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26902368790992015, MAPE Train: 238994765720.3777, RMSE Val: 0.26723637373988945, MAPE Val: 0.03487384799075039
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.992744 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2662473101316356, MAPE Train: 237557057450.13535, RMSE Val: 0.26491207048519994, MAPE Val: 0.03435267708564503
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.961936 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2633367593372636, MAPE Train: 234270481654.8631, RMSE Val: 0.26350998370983697, MAPE Val: 0.03405092718486207




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.974706 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26902368790992015, MAPE Train: 238994765720.3777, RMSE Val: 0.26723637373988945, MAPE Val: 0.03487384799075039
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.054287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2662473101316356, MAPE Train: 237557057450.13535, RMSE Val: 0.26491207048519994, MAPE Val: 0.03435267708564503
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.025086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2633367593372636, MAPE Train: 234270481654.8631, RMSE Val: 0.26350998370983697, MAPE Val: 0.03405092718486207




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.006930 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26902368790992015, MAPE Train: 238994765720.3777, RMSE Val: 0.26723637373988945, MAPE Val: 0.03487384799075039
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.014314 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2662473101316356, MAPE Train: 237557057450.13535, RMSE Val: 0.26491207048519994, MAPE Val: 0.03435267708564503
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.036019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2633367593372636, MAPE Train: 234270481654.8631, RMSE Val: 0.26350998370983697, MAPE Val: 0.03405092718486207




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.149264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26408935492625984, MAPE Train: 228135105488.81177, RMSE Val: 0.26380862812899514, MAPE Val: 0.03412294939800271
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.016877 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26018627070978123, MAPE Train: 222186298706.69653, RMSE Val: 0.26236020263829296, MAPE Val: 0.033834116516048604




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.997834 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2546951501442612, MAPE Train: 216162545545.96225, RMSE Val: 0.2615566164873892, MAPE Val: 0.03371267796300582




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.989252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2629912014914224, MAPE Train: 222973677040.7155, RMSE Val: 0.26379765375531705, MAPE Val: 0.03415970283070524
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.182128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2582467181616902, MAPE Train: 219158350861.16583, RMSE Val: 0.2621864663380829, MAPE Val: 0.03381025779759103
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.996392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25165663257732845, MAPE Train: 211713726727.09155, RMSE Val: 0.2612193528182895, MAPE Val: 0.03366493184944451
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.959608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2626722395333828, MAPE Train: 225082664633.26724, RMSE Val: 0.2634964497648841, MAPE Val: 0.034087109935299086




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.996958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2580200374879288, MAPE Train: 213681310604.33652, RMSE Val: 0.26199584678172655, MAPE Val: 0.03377755184381942
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.989042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2519216009571756, MAPE Train: 206838454541.96655, RMSE Val: 0.2612724506475267, MAPE Val: 0.03367628514465572
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.961351 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26386260794364647, MAPE Train: 229803849196.22748, RMSE Val: 0.26345087757884833, MAPE Val: 0.03402993399576914
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.999516 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2600426066089111, MAPE Train: 223546138060.11316, RMSE Val: 0.2624410747790574, MAPE Val: 0.03385316894029453
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.014528 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2540373180250706, MAPE Train: 215365204880.19156, RMSE Val: 0.2616922612998708, MAPE Val: 0.03376570500978452




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.927822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2611409677811758, MAPE Train: 218209963835.94748, RMSE Val: 0.26254880746506803, MAPE Val: 0.03387902433739859
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.988560 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25598432259653353, MAPE Train: 212742212420.25925, RMSE Val: 0.26161213643887826, MAPE Val: 0.03371296747106064
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.052612 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2478566229853096, MAPE Train: 203626651370.3883, RMSE Val: 0.2612424909145796, MAPE Val: 0.03369711937710896
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.091552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25789568399789703, MAPE Train: 217791486858.0929, RMSE Val: 0.261943237311521, MAPE Val: 0.03380454235110437
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.004676 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2506169816741046, MAPE Train: 209050636339.81467, RMSE Val: 0.26112611445607686, MAPE Val: 0.033666608393819616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.972015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.23958493613707377, MAPE Train: 193980336122.95422, RMSE Val: 0.2608946114103289, MAPE Val: 0.03367274548891602
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.988705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.263740661544958, MAPE Train: 224539590265.17126, RMSE Val: 0.26353508543120124, MAPE Val: 0.03404003287853303




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.055101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2597377129008143, MAPE Train: 217038657014.17093, RMSE Val: 0.2623683301880432, MAPE Val: 0.03384468873514718
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.065587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2536588769260271, MAPE Train: 210063468511.43863, RMSE Val: 0.26165533230114857, MAPE Val: 0.033744065253557574




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.965909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26089507306741183, MAPE Train: 208307439858.5741, RMSE Val: 0.2624534833103562, MAPE Val: 0.033846135650598866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.950814 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2556211828869119, MAPE Train: 201778786729.50647, RMSE Val: 0.2615189448676188, MAPE Val: 0.03370291425267777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.956425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24723190171274848, MAPE Train: 196208730457.9725, RMSE Val: 0.26106473293412036, MAPE Val: 0.03365976674645888
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.008043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2571101963027607, MAPE Train: 202298861502.96472, RMSE Val: 0.2617716689626759, MAPE Val: 0.03373554569494819
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.994655 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2496427314239646, MAPE Train: 199272803349.74796, RMSE Val: 0.26121423260687465, MAPE Val: 0.03366777732139966
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.048775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.23804668317782263, MAPE Train: 189696419161.20667, RMSE Val: 0.2610510759404285, MAPE Val: 0.03364740453268234
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.979433 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26621845718489623, MAPE Train: 236785040724.96826, RMSE Val: 0.26508251104009023, MAPE Val: 0.03438902629923637
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.954705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.263426749304999, MAPE Train: 234851666332.52847, RMSE Val: 0.26364317421617234, MAPE Val: 0.03407754369736715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.016857 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2595556832597162, MAPE Train: 225449070731.53012, RMSE Val: 0.2627729309321343, MAPE Val: 0.033934694280754366




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.133248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26621845718489623, MAPE Train: 236785040724.96826, RMSE Val: 0.26508251104009023, MAPE Val: 0.03438902629923637
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.992473 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.263426749304999, MAPE Train: 234851666332.52847, RMSE Val: 0.26364317421617234, MAPE Val: 0.03407754369736715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.992090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2595556832597162, MAPE Train: 225449070731.53012, RMSE Val: 0.2627729309321343, MAPE Val: 0.033934694280754366




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.083169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26621845718489623, MAPE Train: 236785040724.96826, RMSE Val: 0.26508251104009023, MAPE Val: 0.03438902629923637
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.113705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.263426749304999, MAPE Train: 234851666332.52847, RMSE Val: 0.26364317421617234, MAPE Val: 0.03407754369736715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.989108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2595556832597162, MAPE Train: 225449070731.53012, RMSE Val: 0.2627729309321343, MAPE Val: 0.033934694280754366




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.044008 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2603427998783565, MAPE Train: 230371349017.2284, RMSE Val: 0.262803996892927, MAPE Val: 0.03390652602209388
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.121858 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25491603488005654, MAPE Train: 218133822439.07483, RMSE Val: 0.2620489235222258, MAPE Val: 0.03381539515261804




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.025181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24643698485301696, MAPE Train: 202290396252.59598, RMSE Val: 0.262199706170287, MAPE Val: 0.03389330056871397




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.100456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25821449617734277, MAPE Train: 222227001675.53516, RMSE Val: 0.2623470687047835, MAPE Val: 0.033824080447863406
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.090096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2518863670480678, MAPE Train: 214122577034.9668, RMSE Val: 0.2617105523851052, MAPE Val: 0.033750987176398364
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.986872 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2423155790020979, MAPE Train: 202402860988.34482, RMSE Val: 0.26216587856888807, MAPE Val: 0.0338764760170082
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.042303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2582744432164464, MAPE Train: 225404281948.5868, RMSE Val: 0.2622494513186708, MAPE Val: 0.03382047753896859
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.022289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2520481515001078, MAPE Train: 212934394492.2495, RMSE Val: 0.2615759015420512, MAPE Val: 0.03373002938597987
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.951073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24226303968989152, MAPE Train: 197156922649.2659, RMSE Val: 0.26183201552742413, MAPE Val: 0.03385994605534134
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.012648 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26005060653622536, MAPE Train: 216090943750.36307, RMSE Val: 0.2626658870301008, MAPE Val: 0.03386145834540968
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.032188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25459441937302835, MAPE Train: 215989922619.82504, RMSE Val: 0.26218685343612347, MAPE Val: 0.03384624250980256
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.114948 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2451980929971749, MAPE Train: 196847123865.6897, RMSE Val: 0.2620685535336929, MAPE Val: 0.0338800535640142




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.955691 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2560329283971171, MAPE Train: 215922344528.90274, RMSE Val: 0.2623013963009268, MAPE Val: 0.033856768258743924
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.039370 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24820271617639406, MAPE Train: 206292474029.24515, RMSE Val: 0.2623063602255086, MAPE Val: 0.033914326587666986
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.047168 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2355806052436976, MAPE Train: 192580221789.0392, RMSE Val: 0.2626696463747254, MAPE Val: 0.03407034777427791




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.009613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.251176087405643, MAPE Train: 220055887069.2552, RMSE Val: 0.2615512726639587, MAPE Val: 0.033812045661540044
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.989534 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2407851659989465, MAPE Train: 197080137369.13266, RMSE Val: 0.26183210414388414, MAPE Val: 0.033885462591601925




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.950536 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.22257100690258183, MAPE Train: 173462557368.9965, RMSE Val: 0.2628006184430201, MAPE Val: 0.034153286374205276
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.989267 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.26021643673519723, MAPE Train: 230744723034.12857, RMSE Val: 0.26288084847023857, MAPE Val: 0.03391254664088395
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.974340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25416281680598646, MAPE Train: 221273935971.49225, RMSE Val: 0.2623603744473596, MAPE Val: 0.03385020968529271
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.110094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24449362256822574, MAPE Train: 207137491268.76996, RMSE Val: 0.26253461839250036, MAPE Val: 0.033942283536717986




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.982544 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.25594848237593126, MAPE Train: 195113408619.6783, RMSE Val: 0.2621951841288056, MAPE Val: 0.03382253357652015
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.971695 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.24777476151337002, MAPE Train: 184545307129.3638, RMSE Val: 0.26226266579779023, MAPE Val: 0.03389600550852138
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.941014 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.23451296111747708, MAPE Train: 173963464678.3682, RMSE Val: 0.2626278796963487, MAPE Val: 0.03405733246037198




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.973137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2500078450624036, MAPE Train: 197388308188.2813, RMSE Val: 0.262277043597179, MAPE Val: 0.03384010468512948
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.006251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.23820872297283305, MAPE Train: 190867986456.79572, RMSE Val: 0.2627000982070891, MAPE Val: 0.034026932386635414
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.979713 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276




RMSE Train: 0.2194107726138, MAPE Train: 177613238535.9358, RMSE Val: 0.2637395367297886, MAPE Val: 0.03431611957828058
Best Parameters: {'objective': 'regression', 'metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'num_leaves': 50, 'n_estimators': 200, 'random_state': 42}
Best Validation RMSE: 0.2608138844684909




In [85]:
best_params

{'objective': 'regression',
 'metric': 'rmse',
 'learning_rate': 0.05,
 'max_depth': 7,
 'num_leaves': 50,
 'n_estimators': 200,
 'random_state': 42}

In [86]:
import joblib
# Start MLflow experiment
mlflow.set_experiment("Final LightGBM Model Training")

with mlflow.start_run(run_name="Final Model Training"):
    # Track training time
    start_time = time.time()

    # Train final model with best parameters
    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

    model = lgb.train(
        best_params,
        train_data,
        valid_sets=[train_data, val_data]
    )

    training_time = time.time() - start_time

    # Predictions on train, validation, and test sets
    y_train_pred = model.predict(X_train, num_iteration=model.best_iteration)
    y_val_pred = model.predict(X_val, num_iteration=model.best_iteration)
    y_test_pred = model.predict(X_test, num_iteration=model.best_iteration)

    # Compute metrics
    rmse_train = mean_squared_error(y_train, y_train_pred, squared=False)
    mape_train = mean_absolute_percentage_error(y_train, y_train_pred)

    rmse_val = mean_squared_error(y_val, y_val_pred, squared=False)
    mape_val = mean_absolute_percentage_error(y_val, y_val_pred)

    rmse_test = mean_squared_error(y_test, y_test_pred, squared=False)
    mape_test = mean_absolute_percentage_error(y_test, y_test_pred)

    # Log parameters and metrics to MLflow
    mlflow.log_params(best_params)
    mlflow.log_metric("rmse_train", rmse_train)
    mlflow.log_metric("mape_train", mape_train)
    mlflow.log_metric("rmse_val", rmse_val)
    mlflow.log_metric("mape_val", mape_val)
    mlflow.log_metric("rmse_test", rmse_test)
    mlflow.log_metric("mape_test", mape_test)
    mlflow.log_metric("training_time", training_time)

    # Save model to MLflow
    model_path = "final_lightgbm_model.pkl"
    joblib.dump(model, model_path)
    mlflow.log_artifact(model_path)

    print(f"Training completed - RMSE Train: {rmse_train}, MAPE Train: {mape_train}, RMSE Val: {rmse_val}, MAPE Val: {mape_val}, RMSE Test: {rmse_test}, MAPE Test: {mape_test}")

# Load this model later with joblib.load("final_lightgbm_model.pkl")

2024/11/07 13:06:43 INFO mlflow.tracking.fluent: Experiment with name 'Final LightGBM Model Training' does not exist. Creating a new experiment.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.916344 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69104
[LightGBM] [Info] Number of data points in the train set: 97184, number of used features: 326
[LightGBM] [Info] Start training from score 5.328276
Training completed - RMSE Train: 0.25050217602805813, MAPE Train: 210226334535.80676, RMSE Val: 0.2608138844684909, MAPE Val: 0.03361634475634168, RMSE Test: 0.2672899711129429, MAPE Test: 0.03409688706102758


