In [1]:
# --- Imports ---
import pandas as pd
import numpy as np
import os
from surprise import Dataset, Reader, SVD, dump
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

In [2]:
# --- Setup paths ---
current_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))

file_path = os.path.join(current_dir, 'data', 'processed', 'hotelrec_2013_2017_cleaned_encoded.csv.gz')
cluster_path = os.path.join(current_dir, 'data', 'processed', 'hotel_clusters.csv')
model_save_path = os.path.join(current_dir, 'models')
os.makedirs(model_save_path, exist_ok=True)

In [3]:
# --- Load hotel clusters ---
hotel_clusters = pd.read_csv(cluster_path)

In [None]:
# --- Initialize storage for meta-model stacking --- [~12 min]
all_pred_svd = []
all_pred_sentiment = []
all_pred_hybrid = []
all_true = []

chunk_size = 500_000  # safer for memory

for chunk_idx, chunk in enumerate(pd.read_csv(file_path, chunksize=chunk_size)):
    print(f"\nProcessing Chunk {chunk_idx+1}...")

    # --- Merge hotel clusters ---
    chunk = chunk.merge(hotel_clusters[['hotel_name_id', 'hotel_cluster']], on='hotel_name_id', how='left')

    # --- Fill missing values ---
    structured_features = ['sleep quality', 'value', 'rooms', 'service', 'cleanliness', 'location']
    chunk[structured_features] = chunk[structured_features].fillna(chunk[structured_features].mean())

    # --- Prepare LightGBM Features ---
    feature_cols = ['hotel_name_id', 'author_id', 'sentiment_score'] + structured_features + ['hotel_cluster']
    X = chunk[feature_cols]
    y = chunk['rating']

    # --- Prepare SVD Data (separately) ---
    svd_df = chunk[['author', 'hotel_id', 'rating']]

    # --- Train-Test Split for LightGBM ---
    X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X, y, test_size=0.2, random_state=42)

    # --- Train-Test Split for SVD ---
    svd_train_df, svd_test_df = train_test_split(svd_df, test_size=0.2, random_state=42)

    # --- Train Sentiment-Aware LightGBM ---
    X_train_sentiment = X_train_full[['hotel_name_id', 'author_id', 'sentiment_score']]
    X_test_sentiment = X_test_full[['hotel_name_id', 'author_id', 'sentiment_score']]

    model_sentiment = lgb.LGBMRegressor(
        objective='regression',
        n_estimators=500,
        learning_rate=0.05,
        num_leaves=31,
        random_state=42
    )
    model_sentiment.fit(
        X_train_sentiment, y_train_full,
        eval_set=[(X_test_sentiment, y_test_full)],
        callbacks=[early_stopping(stopping_rounds=20), log_evaluation(50)]
    )

    preds_sentiment = model_sentiment.predict(X_test_sentiment)

    # --- Train Hybrid LightGBM (sentiment + subratings) ---
    model_hybrid = lgb.LGBMRegressor(
        objective='regression',
        n_estimators=500,
        learning_rate=0.05,
        num_leaves=31,
        random_state=42
    )
    model_hybrid.fit(
        X_train_full, y_train_full,
        eval_set=[(X_test_full, y_test_full)],
        callbacks=[early_stopping(stopping_rounds=20), log_evaluation(50)]
    )

    preds_hybrid = model_hybrid.predict(X_test_full)

    # --- Train SVD Model ---
    reader = Reader(rating_scale=(1, 5))
    surprise_data = Dataset.load_from_df(svd_train_df[['author', 'hotel_id', 'rating']], reader)
    trainset = surprise_data.build_full_trainset()

    model_svd = SVD(n_factors=150, n_epochs=20, lr_all=0.005, reg_all=0.02)
    model_svd.fit(trainset)

    # --- Predict batch for SVD ---
    def svd_predict_batch(model, df_batch):
        preds = []
        for idx, row in tqdm(df_batch.iterrows(), total=len(df_batch), desc="SVD Predicting"):
            preds.append(model.predict(row['author'], row['hotel_id']).est)
        return np.array(preds)

    preds_svd = svd_predict_batch(model_svd, svd_test_df)

    # --- Store predictions ---
    all_pred_svd.append(preds_svd)
    all_pred_sentiment.append(preds_sentiment)
    all_pred_hybrid.append(preds_hybrid)
    all_true.append(y_test_full.values)

    print(f"Chunk {chunk_idx+1} done.")


Processing Chunk 1...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001768 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.208140
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.55034
[100]	valid_0's l2: 0.539952
[150]	valid_0's l2: 0.536647
[200]	valid_0's l2: 0.535035
[250]	valid_0's l2: 0.534251
[300]	valid_0's l2: 0.534021
[350]	valid_0's l2: 0.533707
[400]	valid_0's l2: 0.533453
[450]	valid_0's l2: 0.533295
Early stopping, best iteration is:
[459]	valid_0's l2: 0.533213
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003952 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40535.54it/s]


Chunk 1 done.

Processing Chunk 2...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000501 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.158680
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.583786
[100]	valid_0's l2: 0.573056
[150]	valid_0's l2: 0.570345
[200]	valid_0's l2: 0.569462
[250]	valid_0's l2: 0.568887
[300]	valid_0's l2: 0.568518
[350]	valid_0's l2: 0.568354
[400]	valid_0's l2: 0.568158
Early stopping, best iteration is:
[416]	valid_0's l2: 0.568113
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38982.39it/s]


Chunk 2 done.

Processing Chunk 3...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000760 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.148167
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.578446
[100]	valid_0's l2: 0.566602
[150]	valid_0's l2: 0.564088
[200]	valid_0's l2: 0.563158
[250]	valid_0's l2: 0.562689
[300]	valid_0's l2: 0.562353
[350]	valid_0's l2: 0.562195
Early stopping, best iteration is:
[362]	valid_0's l2: 0.562117
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003654 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Ligh

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38527.02it/s]


Chunk 3 done.

Processing Chunk 4...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000473 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.155455
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.583172
[100]	valid_0's l2: 0.571218
[150]	valid_0's l2: 0.568401
[200]	valid_0's l2: 0.56732
[250]	valid_0's l2: 0.566806
[300]	valid_0's l2: 0.566573
[350]	valid_0's l2: 0.566417
[400]	valid_0's l2: 0.566312
[450]	valid_0's l2: 0.566208
Early stopping, best iteration is:
[451]	valid_0's l2: 0.566207
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002173 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memor

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37861.69it/s]


Chunk 4 done.

Processing Chunk 5...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000440 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.132313
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.587475
[100]	valid_0's l2: 0.576576
[150]	valid_0's l2: 0.573747
[200]	valid_0's l2: 0.572841
[250]	valid_0's l2: 0.5723
[300]	valid_0's l2: 0.571919
[350]	valid_0's l2: 0.571614
[400]	valid_0's l2: 0.571462
Early stopping, best iteration is:
[429]	valid_0's l2: 0.571369
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002503 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37553.74it/s]


Chunk 5 done.

Processing Chunk 6...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000448 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.062910
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.618201
[100]	valid_0's l2: 0.605385
[150]	valid_0's l2: 0.602338
[200]	valid_0's l2: 0.60133
[250]	valid_0's l2: 0.600755
[300]	valid_0's l2: 0.600387
[350]	valid_0's l2: 0.600019
[400]	valid_0's l2: 0.599849
[450]	valid_0's l2: 0.599692
[500]	valid_0's l2: 0.599552
Did not meet early stopping. Best iteration is:
[493]	valid_0's l2: 0.599531
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002934 seconds.
You can set `force_row_wise=

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37878.64it/s]


Chunk 6 done.

Processing Chunk 7...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 764
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.255133
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.529644
[100]	valid_0's l2: 0.513262
[150]	valid_0's l2: 0.508237
[200]	valid_0's l2: 0.506205
[250]	valid_0's l2: 0.505196
[300]	valid_0's l2: 0.504621
[350]	valid_0's l2: 0.503993
[400]	valid_0's l2: 0.503638
[450]	valid_0's l2: 0.503369
[500]	valid_0's l2: 0.503079
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.503079
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003755 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38828.65it/s]


Chunk 7 done.

Processing Chunk 8...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000509 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 759
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.197578
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.546519
[100]	valid_0's l2: 0.525263
[150]	valid_0's l2: 0.516779
[200]	valid_0's l2: 0.513798
[250]	valid_0's l2: 0.512162
[300]	valid_0's l2: 0.510874
[350]	valid_0's l2: 0.509894
[400]	valid_0's l2: 0.509512
[450]	valid_0's l2: 0.509038
[500]	valid_0's l2: 0.508819
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.508819
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001718 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39088.55it/s]


Chunk 8 done.

Processing Chunk 9...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000433 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 745
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.254105
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.517427
[100]	valid_0's l2: 0.496185
[150]	valid_0's l2: 0.489237
[200]	valid_0's l2: 0.485373
[250]	valid_0's l2: 0.483569
[300]	valid_0's l2: 0.482565
[350]	valid_0's l2: 0.482035
[400]	valid_0's l2: 0.481487
[450]	valid_0's l2: 0.481187
[500]	valid_0's l2: 0.480867
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.480864
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002325 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39501.64it/s]


Chunk 9 done.

Processing Chunk 10...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 762
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.189203
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.551573
[100]	valid_0's l2: 0.536028
[150]	valid_0's l2: 0.530209
[200]	valid_0's l2: 0.527808
[250]	valid_0's l2: 0.526328
[300]	valid_0's l2: 0.525562
[350]	valid_0's l2: 0.525091
[400]	valid_0's l2: 0.524786
[450]	valid_0's l2: 0.524537
[500]	valid_0's l2: 0.524463
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.524461
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002716 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39571.06it/s]


Chunk 10 done.

Processing Chunk 11...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.224188
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.541172
[100]	valid_0's l2: 0.528928
[150]	valid_0's l2: 0.524947
[200]	valid_0's l2: 0.523043
[250]	valid_0's l2: 0.521934
[300]	valid_0's l2: 0.521325
[350]	valid_0's l2: 0.520941
[400]	valid_0's l2: 0.520535
[450]	valid_0's l2: 0.520378
[500]	valid_0's l2: 0.520232
Did not meet early stopping. Best iteration is:
[497]	valid_0's l2: 0.520226
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002622 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37564.54it/s]


Chunk 11 done.

Processing Chunk 12...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000462 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 764
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.201778
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.549491
[100]	valid_0's l2: 0.535029
[150]	valid_0's l2: 0.530104
[200]	valid_0's l2: 0.527947
[250]	valid_0's l2: 0.526841
[300]	valid_0's l2: 0.526182
[350]	valid_0's l2: 0.525774
[400]	valid_0's l2: 0.525338
[450]	valid_0's l2: 0.525147
[500]	valid_0's l2: 0.524921
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.524921
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002409 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39469.91it/s]


Chunk 12 done.

Processing Chunk 13...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000688 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.156355
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.584082
[100]	valid_0's l2: 0.57461
[150]	valid_0's l2: 0.572447
[200]	valid_0's l2: 0.571639
[250]	valid_0's l2: 0.571255
[300]	valid_0's l2: 0.571036
[350]	valid_0's l2: 0.57082
[400]	valid_0's l2: 0.570734
[450]	valid_0's l2: 0.570641
Early stopping, best iteration is:
[446]	valid_0's l2: 0.570633
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memo

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39674.51it/s]


Chunk 13 done.

Processing Chunk 14...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000490 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.145535
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.58893
[100]	valid_0's l2: 0.58007
[150]	valid_0's l2: 0.578579
[200]	valid_0's l2: 0.578168
[250]	valid_0's l2: 0.577804
[300]	valid_0's l2: 0.577527
[350]	valid_0's l2: 0.577425
Early stopping, best iteration is:
[341]	valid_0's l2: 0.577391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Ligh

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39597.24it/s]


Chunk 14 done.

Processing Chunk 15...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000468 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.203525
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.567013
[100]	valid_0's l2: 0.555959
[150]	valid_0's l2: 0.552513
[200]	valid_0's l2: 0.551004
[250]	valid_0's l2: 0.550187
[300]	valid_0's l2: 0.549658
[350]	valid_0's l2: 0.549316
[400]	valid_0's l2: 0.549153
[450]	valid_0's l2: 0.549105
[500]	valid_0's l2: 0.548939
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.548939
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005805 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37905.33it/s]


Chunk 15 done.

Processing Chunk 16...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000460 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.191915
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.563209
[100]	valid_0's l2: 0.552815
[150]	valid_0's l2: 0.548782
[200]	valid_0's l2: 0.547218
[250]	valid_0's l2: 0.546462
[300]	valid_0's l2: 0.546039
[350]	valid_0's l2: 0.545542
[400]	valid_0's l2: 0.545219
[450]	valid_0's l2: 0.545061
[500]	valid_0's l2: 0.544943
Did not meet early stopping. Best iteration is:
[492]	valid_0's l2: 0.54492
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003177 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39570.57it/s]


Chunk 16 done.

Processing Chunk 17...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000556 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.144127
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.594237
[100]	valid_0's l2: 0.585586
[150]	valid_0's l2: 0.583395
[200]	valid_0's l2: 0.582691
[250]	valid_0's l2: 0.582298
[300]	valid_0's l2: 0.582116
[350]	valid_0's l2: 0.582021
Early stopping, best iteration is:
[351]	valid_0's l2: 0.582014
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Li

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39174.67it/s]


Chunk 17 done.

Processing Chunk 18...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000495 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.126993
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.596041
[100]	valid_0's l2: 0.587022
[150]	valid_0's l2: 0.585252
[200]	valid_0's l2: 0.584464
[250]	valid_0's l2: 0.584125
[300]	valid_0's l2: 0.583868
[350]	valid_0's l2: 0.583807
[400]	valid_0's l2: 0.583705
Early stopping, best iteration is:
[416]	valid_0's l2: 0.583695
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can s

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39222.62it/s]


Chunk 18 done.

Processing Chunk 19...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000502 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.148128
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.58644
[100]	valid_0's l2: 0.576894
[150]	valid_0's l2: 0.575072
[200]	valid_0's l2: 0.574392
[250]	valid_0's l2: 0.574099
[300]	valid_0's l2: 0.573905
[350]	valid_0's l2: 0.573715
[400]	valid_0's l2: 0.573553
Early stopping, best iteration is:
[411]	valid_0's l2: 0.57348
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002095 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39769.21it/s]


Chunk 19 done.

Processing Chunk 20...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.132453
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.596136
[100]	valid_0's l2: 0.587119
[150]	valid_0's l2: 0.585392
[200]	valid_0's l2: 0.584849
[250]	valid_0's l2: 0.584516
[300]	valid_0's l2: 0.584311
[350]	valid_0's l2: 0.584252
Early stopping, best iteration is:
[332]	valid_0's l2: 0.584209
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Li

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40067.65it/s]


Chunk 20 done.

Processing Chunk 21...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000419 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.136520
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.602506
[100]	valid_0's l2: 0.593689
[150]	valid_0's l2: 0.5917
[200]	valid_0's l2: 0.590881
[250]	valid_0's l2: 0.59041
[300]	valid_0's l2: 0.590232
[350]	valid_0's l2: 0.590185
Early stopping, best iteration is:
[331]	valid_0's l2: 0.590166
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003140 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Light

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39766.99it/s]


Chunk 21 done.

Processing Chunk 22...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000417 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.126015
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.596722
[100]	valid_0's l2: 0.587467
[150]	valid_0's l2: 0.585889
[200]	valid_0's l2: 0.585138
[250]	valid_0's l2: 0.58482
[300]	valid_0's l2: 0.584638
[350]	valid_0's l2: 0.584564
Early stopping, best iteration is:
[342]	valid_0's l2: 0.584551
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002866 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Lig

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39840.42it/s]


Chunk 22 done.

Processing Chunk 23...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.132998
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.596769
[100]	valid_0's l2: 0.587097
[150]	valid_0's l2: 0.585087
[200]	valid_0's l2: 0.584427
[250]	valid_0's l2: 0.584159
[300]	valid_0's l2: 0.583995
[350]	valid_0's l2: 0.58384
Early stopping, best iteration is:
[361]	valid_0's l2: 0.58378
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003465 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Ligh

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40197.12it/s]


Chunk 23 done.

Processing Chunk 24...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.112805
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.615095
[100]	valid_0's l2: 0.606106
[150]	valid_0's l2: 0.604437
[200]	valid_0's l2: 0.603974
[250]	valid_0's l2: 0.603545
[300]	valid_0's l2: 0.603374
[350]	valid_0's l2: 0.603201
[400]	valid_0's l2: 0.60313
Early stopping, best iteration is:
[387]	valid_0's l2: 0.603094
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002272 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 812
[Light

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39589.49it/s]


Chunk 24 done.

Processing Chunk 25...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000480 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.023923
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.644962
[100]	valid_0's l2: 0.636217
[150]	valid_0's l2: 0.634912
[200]	valid_0's l2: 0.634646
[250]	valid_0's l2: 0.63449
[300]	valid_0's l2: 0.634401
Early stopping, best iteration is:
[288]	valid_0's l2: 0.634392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 812


SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38847.07it/s]


Chunk 25 done.

Processing Chunk 26...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000494 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.046430
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.636934
[100]	valid_0's l2: 0.627766
[150]	valid_0's l2: 0.626121
[200]	valid_0's l2: 0.625739
[250]	valid_0's l2: 0.625471
[300]	valid_0's l2: 0.625361
Early stopping, best iteration is:
[320]	valid_0's l2: 0.625286
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002323 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 812

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39393.30it/s]


Chunk 26 done.

Processing Chunk 27...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000551 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.142098
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.588605
[100]	valid_0's l2: 0.577352
[150]	valid_0's l2: 0.574242
[200]	valid_0's l2: 0.573131
[250]	valid_0's l2: 0.572581
[300]	valid_0's l2: 0.572296
[350]	valid_0's l2: 0.572033
[400]	valid_0's l2: 0.571846
[450]	valid_0's l2: 0.57173
Early stopping, best iteration is:
[432]	valid_0's l2: 0.571729
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002003 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38390.07it/s]


Chunk 27 done.

Processing Chunk 28...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000577 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.098470
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.618632
[100]	valid_0's l2: 0.610648
[150]	valid_0's l2: 0.60881
[200]	valid_0's l2: 0.608259
[250]	valid_0's l2: 0.608053
[300]	valid_0's l2: 0.607935
[350]	valid_0's l2: 0.607883
Early stopping, best iteration is:
[343]	valid_0's l2: 0.607878
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002770 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Lig

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37833.52it/s]


Chunk 28 done.

Processing Chunk 29...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000483 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.123045
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.601924
[100]	valid_0's l2: 0.59088
[150]	valid_0's l2: 0.588341
[200]	valid_0's l2: 0.587705
[250]	valid_0's l2: 0.58749
[300]	valid_0's l2: 0.587336
[350]	valid_0's l2: 0.587223
[400]	valid_0's l2: 0.587151
[450]	valid_0's l2: 0.587072
Early stopping, best iteration is:
[448]	valid_0's l2: 0.587065
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002450 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memo

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39016.00it/s]


Chunk 29 done.

Processing Chunk 30...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000545 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.162497
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.579598
[100]	valid_0's l2: 0.569795
[150]	valid_0's l2: 0.567168
[200]	valid_0's l2: 0.566166
[250]	valid_0's l2: 0.565566
[300]	valid_0's l2: 0.565243
[350]	valid_0's l2: 0.565105
Early stopping, best iteration is:
[376]	valid_0's l2: 0.564934
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002349 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Li

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40357.43it/s]


Chunk 30 done.

Processing Chunk 31...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.105058
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.612674
[100]	valid_0's l2: 0.602483
[150]	valid_0's l2: 0.600776
[200]	valid_0's l2: 0.600281
[250]	valid_0's l2: 0.600116
[300]	valid_0's l2: 0.599946
[350]	valid_0's l2: 0.599792
[400]	valid_0's l2: 0.599757
Early stopping, best iteration is:
[412]	valid_0's l2: 0.599725
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002353 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 812
[Ligh

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39145.69it/s]


Chunk 31 done.

Processing Chunk 32...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000537 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.092367
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.618121
[100]	valid_0's l2: 0.607453
[150]	valid_0's l2: 0.605779
[200]	valid_0's l2: 0.605474
[250]	valid_0's l2: 0.605294
[300]	valid_0's l2: 0.605175
[350]	valid_0's l2: 0.605148
Early stopping, best iteration is:
[338]	valid_0's l2: 0.6051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001861 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Ligh

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39717.44it/s]


Chunk 32 done.

Processing Chunk 33...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000610 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.027490
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.641769
[100]	valid_0's l2: 0.631888
[150]	valid_0's l2: 0.630186
[200]	valid_0's l2: 0.629686
[250]	valid_0's l2: 0.629362
[300]	valid_0's l2: 0.629248
Early stopping, best iteration is:
[295]	valid_0's l2: 0.629229
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 812

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38266.20it/s]


Chunk 33 done.

Processing Chunk 34...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000428 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.017895
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.647774
[100]	valid_0's l2: 0.63678
[150]	valid_0's l2: 0.634587
[200]	valid_0's l2: 0.633852
[250]	valid_0's l2: 0.633457
[300]	valid_0's l2: 0.633052
[350]	valid_0's l2: 0.632901
Early stopping, best iteration is:
[373]	valid_0's l2: 0.632774
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002114 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Lig

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38680.59it/s]


Chunk 34 done.

Processing Chunk 35...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.172758
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.56981
[100]	valid_0's l2: 0.557391
[150]	valid_0's l2: 0.553588
[200]	valid_0's l2: 0.552268
[250]	valid_0's l2: 0.551407
[300]	valid_0's l2: 0.550868
[350]	valid_0's l2: 0.550313
[400]	valid_0's l2: 0.550039
[450]	valid_0's l2: 0.549753
[500]	valid_0's l2: 0.54957
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.54957
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002719 seconds.
You can set `force_row_wise=

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39012.27it/s]


Chunk 35 done.

Processing Chunk 36...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000479 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.173830
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.569305
[100]	valid_0's l2: 0.556185
[150]	valid_0's l2: 0.552125
[200]	valid_0's l2: 0.550899
[250]	valid_0's l2: 0.550372
[300]	valid_0's l2: 0.54997
[350]	valid_0's l2: 0.549658
[400]	valid_0's l2: 0.549433
[450]	valid_0's l2: 0.549321
Early stopping, best iteration is:
[478]	valid_0's l2: 0.549263
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002914 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39520.35it/s]


Chunk 36 done.

Processing Chunk 37...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000495 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.209575
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.548948
[100]	valid_0's l2: 0.538062
[150]	valid_0's l2: 0.535126
[200]	valid_0's l2: 0.534049
[250]	valid_0's l2: 0.533416
[300]	valid_0's l2: 0.532948
[350]	valid_0's l2: 0.532582
[400]	valid_0's l2: 0.532385
[450]	valid_0's l2: 0.532198
[500]	valid_0's l2: 0.532035
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.532035
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002162 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39527.51it/s]


Chunk 37 done.

Processing Chunk 38...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000503 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.204533
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.553443
[100]	valid_0's l2: 0.542377
[150]	valid_0's l2: 0.539388
[200]	valid_0's l2: 0.53843
[250]	valid_0's l2: 0.537848
[300]	valid_0's l2: 0.537405
[350]	valid_0's l2: 0.537184
[400]	valid_0's l2: 0.53697
[450]	valid_0's l2: 0.536844
[500]	valid_0's l2: 0.536691
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.536686
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002533 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39772.02it/s]


Chunk 38 done.

Processing Chunk 39...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000505 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.187803
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.571888
[100]	valid_0's l2: 0.559811
[150]	valid_0's l2: 0.555986
[200]	valid_0's l2: 0.55462
[250]	valid_0's l2: 0.553916
[300]	valid_0's l2: 0.553467
[350]	valid_0's l2: 0.552959
[400]	valid_0's l2: 0.552598
[450]	valid_0's l2: 0.552319
[500]	valid_0's l2: 0.552168
Did not meet early stopping. Best iteration is:
[487]	valid_0's l2: 0.552159
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002789 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38970.55it/s]


Chunk 39 done.

Processing Chunk 40...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000567 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.239377
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.541746
[100]	valid_0's l2: 0.529141
[150]	valid_0's l2: 0.525873
[200]	valid_0's l2: 0.524455
[250]	valid_0's l2: 0.523514
[300]	valid_0's l2: 0.523023
[350]	valid_0's l2: 0.522712
[400]	valid_0's l2: 0.522372
[450]	valid_0's l2: 0.522265
[500]	valid_0's l2: 0.522048
Did not meet early stopping. Best iteration is:
[491]	valid_0's l2: 0.522035
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002043 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39779.36it/s]


Chunk 40 done.

Processing Chunk 41...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.182710
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.565181
[100]	valid_0's l2: 0.552317
[150]	valid_0's l2: 0.548471
[200]	valid_0's l2: 0.547017
[250]	valid_0's l2: 0.546293
[300]	valid_0's l2: 0.545865
[350]	valid_0's l2: 0.545438
[400]	valid_0's l2: 0.545325
[450]	valid_0's l2: 0.545217
[500]	valid_0's l2: 0.545067
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.545067
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002606 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38467.00it/s]


Chunk 41 done.

Processing Chunk 42...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.151510
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.578987
[100]	valid_0's l2: 0.564623
[150]	valid_0's l2: 0.560905
[200]	valid_0's l2: 0.559213
[250]	valid_0's l2: 0.558491
[300]	valid_0's l2: 0.558007
[350]	valid_0's l2: 0.557528
[400]	valid_0's l2: 0.557229
[450]	valid_0's l2: 0.557084
[500]	valid_0's l2: 0.556948
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.556948
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002591 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39547.83it/s]


Chunk 42 done.

Processing Chunk 43...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000460 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.131197
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.58361
[100]	valid_0's l2: 0.570292
[150]	valid_0's l2: 0.566654
[200]	valid_0's l2: 0.565051
[250]	valid_0's l2: 0.564322
[300]	valid_0's l2: 0.563964
[350]	valid_0's l2: 0.563733
[400]	valid_0's l2: 0.563625
[450]	valid_0's l2: 0.563522
Early stopping, best iteration is:
[470]	valid_0's l2: 0.563464
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39568.92it/s]


Chunk 43 done.

Processing Chunk 44...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.166155
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.570026
[100]	valid_0's l2: 0.55782
[150]	valid_0's l2: 0.554798
[200]	valid_0's l2: 0.553602
[250]	valid_0's l2: 0.552872
[300]	valid_0's l2: 0.552365
[350]	valid_0's l2: 0.552225
[400]	valid_0's l2: 0.55223
Early stopping, best iteration is:
[388]	valid_0's l2: 0.552201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001778 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39504.25it/s]


Chunk 44 done.

Processing Chunk 45...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.164095
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.573001
[100]	valid_0's l2: 0.562425
[150]	valid_0's l2: 0.559374
[200]	valid_0's l2: 0.558602
[250]	valid_0's l2: 0.558165
[300]	valid_0's l2: 0.557872
[350]	valid_0's l2: 0.557665
[400]	valid_0's l2: 0.557472
[450]	valid_0's l2: 0.557294
[500]	valid_0's l2: 0.557213
Did not meet early stopping. Best iteration is:
[496]	valid_0's l2: 0.55721
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002342 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38813.96it/s]


Chunk 45 done.

Processing Chunk 46...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.137833
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.57567
[100]	valid_0's l2: 0.562889
[150]	valid_0's l2: 0.559569
[200]	valid_0's l2: 0.558241
[250]	valid_0's l2: 0.557527
[300]	valid_0's l2: 0.557133
[350]	valid_0's l2: 0.556858
[400]	valid_0's l2: 0.556599
[450]	valid_0's l2: 0.556476
[500]	valid_0's l2: 0.556305
Did not meet early stopping. Best iteration is:
[493]	valid_0's l2: 0.556287
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002232 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 37945.54it/s]


Chunk 46 done.

Processing Chunk 47...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000419 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.146365
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.577488
[100]	valid_0's l2: 0.565237
[150]	valid_0's l2: 0.560882
[200]	valid_0's l2: 0.559193
[250]	valid_0's l2: 0.558348
[300]	valid_0's l2: 0.557874
[350]	valid_0's l2: 0.557553
[400]	valid_0's l2: 0.557175
[450]	valid_0's l2: 0.557001
[500]	valid_0's l2: 0.556859
Did not meet early stopping. Best iteration is:
[493]	valid_0's l2: 0.556851
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002766 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39070.71it/s]


Chunk 47 done.

Processing Chunk 48...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000420 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.156218
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.575421
[100]	valid_0's l2: 0.561845
[150]	valid_0's l2: 0.557625
[200]	valid_0's l2: 0.556133
[250]	valid_0's l2: 0.555189
[300]	valid_0's l2: 0.554716
[350]	valid_0's l2: 0.554427
[400]	valid_0's l2: 0.554188
[450]	valid_0's l2: 0.553962
[500]	valid_0's l2: 0.553862
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.553862
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001996 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39188.19it/s]


Chunk 48 done.

Processing Chunk 49...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000489 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 758
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.224757
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.538633
[100]	valid_0's l2: 0.523711
[150]	valid_0's l2: 0.517955
[200]	valid_0's l2: 0.515737
[250]	valid_0's l2: 0.514487
[300]	valid_0's l2: 0.513733
[350]	valid_0's l2: 0.513338
[400]	valid_0's l2: 0.512989
[450]	valid_0's l2: 0.512876
Early stopping, best iteration is:
[473]	valid_0's l2: 0.512796
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if me

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40140.32it/s]


Chunk 49 done.

Processing Chunk 50...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 758
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.233473
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.529383
[100]	valid_0's l2: 0.513139
[150]	valid_0's l2: 0.506649
[200]	valid_0's l2: 0.504357
[250]	valid_0's l2: 0.503106
[300]	valid_0's l2: 0.502504
[350]	valid_0's l2: 0.501838
[400]	valid_0's l2: 0.501482
[450]	valid_0's l2: 0.501149
[500]	valid_0's l2: 0.500782
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.500782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002307 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40047.54it/s]


Chunk 50 done.

Processing Chunk 51...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000447 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 744
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.257178
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.520733
[100]	valid_0's l2: 0.499842
[150]	valid_0's l2: 0.491571
[200]	valid_0's l2: 0.488734
[250]	valid_0's l2: 0.487081
[300]	valid_0's l2: 0.485744
[350]	valid_0's l2: 0.484982
[400]	valid_0's l2: 0.484626
[450]	valid_0's l2: 0.484255
[500]	valid_0's l2: 0.4841
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.484099
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003130 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40505.84it/s]


Chunk 51 done.

Processing Chunk 52...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000452 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 764
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.233562
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.529956
[100]	valid_0's l2: 0.506623
[150]	valid_0's l2: 0.498953
[200]	valid_0's l2: 0.495734
[250]	valid_0's l2: 0.494111
[300]	valid_0's l2: 0.492794
[350]	valid_0's l2: 0.492025
[400]	valid_0's l2: 0.49151
[450]	valid_0's l2: 0.491173
[500]	valid_0's l2: 0.490832
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.490832
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003349 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40467.01it/s]


Chunk 52 done.

Processing Chunk 53...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000448 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 749
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.195520
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.544897
[100]	valid_0's l2: 0.524935
[150]	valid_0's l2: 0.518623
[200]	valid_0's l2: 0.515866
[250]	valid_0's l2: 0.514177
[300]	valid_0's l2: 0.513121
[350]	valid_0's l2: 0.512528
[400]	valid_0's l2: 0.512082
[450]	valid_0's l2: 0.511716
[500]	valid_0's l2: 0.511597
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.511597
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002496 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39773.59it/s]


Chunk 53 done.

Processing Chunk 54...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 763
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.267993
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.512731
[100]	valid_0's l2: 0.492146
[150]	valid_0's l2: 0.485446
[200]	valid_0's l2: 0.482686
[250]	valid_0's l2: 0.481374
[300]	valid_0's l2: 0.480487
[350]	valid_0's l2: 0.479897
[400]	valid_0's l2: 0.479512
[450]	valid_0's l2: 0.479145
Early stopping, best iteration is:
[469]	valid_0's l2: 0.479095
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002004 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if me

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39911.41it/s]


Chunk 54 done.

Processing Chunk 55...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000443 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 742
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.237278
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.528322
[100]	valid_0's l2: 0.50682
[150]	valid_0's l2: 0.502309
[200]	valid_0's l2: 0.499648
[250]	valid_0's l2: 0.497988
[300]	valid_0's l2: 0.496848
[350]	valid_0's l2: 0.495925
[400]	valid_0's l2: 0.49541
[450]	valid_0's l2: 0.495042
[500]	valid_0's l2: 0.494689
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.494689
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002284 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40544.24it/s]


Chunk 55 done.

Processing Chunk 56...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000536 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 740
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.265045
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.517648
[100]	valid_0's l2: 0.499301
[150]	valid_0's l2: 0.49317
[200]	valid_0's l2: 0.489973
[250]	valid_0's l2: 0.488477
[300]	valid_0's l2: 0.487452
[350]	valid_0's l2: 0.487007
[400]	valid_0's l2: 0.486638
[450]	valid_0's l2: 0.486477
[500]	valid_0's l2: 0.486213
Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 0.486213
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002211 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40507.60it/s]


Chunk 56 done.

Processing Chunk 57...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000412 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 732
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.249557
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.51876
[100]	valid_0's l2: 0.499584
[150]	valid_0's l2: 0.492489
[200]	valid_0's l2: 0.489468
[250]	valid_0's l2: 0.487787
[300]	valid_0's l2: 0.486901
[350]	valid_0's l2: 0.486139
[400]	valid_0's l2: 0.485661
[450]	valid_0's l2: 0.485392
[500]	valid_0's l2: 0.485142
Did not meet early stopping. Best iteration is:
[498]	valid_0's l2: 0.485142
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002352 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40260.91it/s]


Chunk 57 done.

Processing Chunk 58...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000433 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.179412
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.564468
[100]	valid_0's l2: 0.548532
[150]	valid_0's l2: 0.543491
[200]	valid_0's l2: 0.541876
[250]	valid_0's l2: 0.540665
[300]	valid_0's l2: 0.539942
[350]	valid_0's l2: 0.539431
[400]	valid_0's l2: 0.539085
[450]	valid_0's l2: 0.538793
[500]	valid_0's l2: 0.538662
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.538661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001950 seconds.
You can set `force_row_wi

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40319.44it/s]


Chunk 58 done.

Processing Chunk 59...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000466 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.164813
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.569244
[100]	valid_0's l2: 0.556727
[150]	valid_0's l2: 0.553255
[200]	valid_0's l2: 0.55181
[250]	valid_0's l2: 0.550947
[300]	valid_0's l2: 0.55048
[350]	valid_0's l2: 0.550088
[400]	valid_0's l2: 0.549901
[450]	valid_0's l2: 0.549658
[500]	valid_0's l2: 0.549522
Did not meet early stopping. Best iteration is:
[497]	valid_0's l2: 0.549517
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002567 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39668.00it/s]


Chunk 59 done.

Processing Chunk 60...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000467 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 762
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.216645
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.539785
[100]	valid_0's l2: 0.523937
[150]	valid_0's l2: 0.51871
[200]	valid_0's l2: 0.516189
[250]	valid_0's l2: 0.515153
[300]	valid_0's l2: 0.514482
[350]	valid_0's l2: 0.513909
[400]	valid_0's l2: 0.513511
[450]	valid_0's l2: 0.513098
[500]	valid_0's l2: 0.512824
Did not meet early stopping. Best iteration is:
[499]	valid_0's l2: 0.512824
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003082 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38921.97it/s]


Chunk 60 done.

Processing Chunk 61...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.187608
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.558551
[100]	valid_0's l2: 0.546166
[150]	valid_0's l2: 0.542478
[200]	valid_0's l2: 0.540591
[250]	valid_0's l2: 0.539701
[300]	valid_0's l2: 0.539205
[350]	valid_0's l2: 0.538851
[400]	valid_0's l2: 0.538483
[450]	valid_0's l2: 0.53837
[500]	valid_0's l2: 0.538279
Did not meet early stopping. Best iteration is:
[496]	valid_0's l2: 0.538264
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003594 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 38587.34it/s]


Chunk 61 done.

Processing Chunk 62...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000426 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.151928
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.57898
[100]	valid_0's l2: 0.567326
[150]	valid_0's l2: 0.564354
[200]	valid_0's l2: 0.563198
[250]	valid_0's l2: 0.562434
[300]	valid_0's l2: 0.562067
[350]	valid_0's l2: 0.561779
[400]	valid_0's l2: 0.561577
[450]	valid_0's l2: 0.561414
[500]	valid_0's l2: 0.561317
Did not meet early stopping. Best iteration is:
[491]	valid_0's l2: 0.561307
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003235 seconds.
You can set `force_row_wis

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39130.20it/s]


Chunk 62 done.

Processing Chunk 63...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.150127
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.581039
[100]	valid_0's l2: 0.56876
[150]	valid_0's l2: 0.565833
[200]	valid_0's l2: 0.564692
[250]	valid_0's l2: 0.564053
[300]	valid_0's l2: 0.563615
[350]	valid_0's l2: 0.563371
[400]	valid_0's l2: 0.563026
[450]	valid_0's l2: 0.562909
Early stopping, best iteration is:
[465]	valid_0's l2: 0.562885
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003052 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 40508.99it/s]


Chunk 63 done.

Processing Chunk 64...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000443 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.148087
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.581396
[100]	valid_0's l2: 0.571156
[150]	valid_0's l2: 0.569245
[200]	valid_0's l2: 0.568406
[250]	valid_0's l2: 0.567707
[300]	valid_0's l2: 0.567341
[350]	valid_0's l2: 0.56715
[400]	valid_0's l2: 0.567023
[450]	valid_0's l2: 0.566954
Early stopping, best iteration is:
[456]	valid_0's l2: 0.566934
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002711 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39565.56it/s]


Chunk 64 done.

Processing Chunk 65...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 400000, number of used features: 3
[LightGBM] [Info] Start training from score 4.149095
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.585484
[100]	valid_0's l2: 0.575502
[150]	valid_0's l2: 0.572994
[200]	valid_0's l2: 0.57231
[250]	valid_0's l2: 0.571828
[300]	valid_0's l2: 0.571492
[350]	valid_0's l2: 0.571288
[400]	valid_0's l2: 0.57108
[450]	valid_0's l2: 0.571001
[500]	valid_0's l2: 0.570946
Did not meet early stopping. Best iteration is:
[497]	valid_0's l2: 0.570941
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002255 seconds.
You can set `force_row_wise

SVD Predicting: 100%|██████████| 100000/100000 [00:02<00:00, 39434.11it/s]


Chunk 65 done.

Processing Chunk 66...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000336 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 314009, number of used features: 3
[LightGBM] [Info] Start training from score 4.126837
Training until validation scores don't improve for 20 rounds
[50]	valid_0's l2: 0.585086
[100]	valid_0's l2: 0.573155
[150]	valid_0's l2: 0.570195
[200]	valid_0's l2: 0.568801
[250]	valid_0's l2: 0.56816
[300]	valid_0's l2: 0.567616
[350]	valid_0's l2: 0.567302
[400]	valid_0's l2: 0.567097
[450]	valid_0's l2: 0.567031
Early stopping, best iteration is:
[464]	valid_0's l2: 0.566986
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001878 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if mem

SVD Predicting: 100%|██████████| 78503/78503 [00:02<00:00, 38987.68it/s]

Chunk 66 done.





In [6]:
# --- Stack all predictions together ---
pred_svd_all = np.concatenate(all_pred_svd)
pred_sentiment_all = np.concatenate(all_pred_sentiment)
pred_hybrid_all = np.concatenate(all_pred_hybrid)
y_true_all = np.concatenate(all_true)

In [7]:
stacked_X = np.vstack([pred_svd_all, pred_sentiment_all, pred_hybrid_all]).T
stacked_y = y_true_all

In [8]:
# --- Final Train-Test Split for Meta-Model ---
X_train_stack, X_test_stack, y_train_stack, y_test_stack = train_test_split(stacked_X, stacked_y, test_size=0.2, random_state=42)

In [9]:
# --- Train Meta-Model (Linear Regression) ---
meta_model = LinearRegression()
meta_model.fit(X_train_stack, y_train_stack)

In [10]:
# --- Predict and Evaluate ---
y_pred_stack = meta_model.predict(X_test_stack)

rmse = mean_squared_error(y_test_stack, y_pred_stack, squared=False)
mae = mean_absolute_error(y_test_stack, y_pred_stack)

In [11]:
print("\n=== Final Stacked Ensemble Model Results ===")
print(f"RMSE: {rmse:.4f}")
print(f"MAE:  {mae:.4f}")


=== Final Stacked Ensemble Model Results ===
RMSE: 0.5418
MAE:  0.3754


In [14]:
# --- Save Meta-Model ---
joblib.dump(meta_model, os.path.join(model_save_path, 'meta_model.pkl'))

print("Saved final stacked meta-model.")

Saved final stacked meta-model.
