In [1]:
# --- Imports ---
import pandas as pd
import numpy as np
import os
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm 

In [2]:
current_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
current_dir

'/root/cmpe256/cmpe256_hotel_recommendation_system'

In [6]:
file_path = os.path.join(current_dir, 'data', 'processed', 'hotelrec_2013_2017_cleaned_encoded.csv.gz')
file_path

'/root/cmpe256/cmpe256_hotel_recommendation_system/data/processed/hotelrec_2013_2017_cleaned_encoded.csv.gz'

In [7]:
# Parameters
chunk_size = 1_000_000
reader = Reader(rating_scale=(1, 5))

In [8]:
total_rows = sum(1 for _ in pd.read_csv(file_path, usecols=[0], chunksize=chunk_size))
total_rows

33

In [9]:
# Initialize list if you want to store results [~40 min]
all_rmse = []
all_mae = []
all_fcp = []

# --- Processing with Progress Bar ---
for chunk_idx, chunk in enumerate(tqdm(pd.read_csv(file_path, chunksize=chunk_size), total=total_rows, desc="Processing chunks")):
    print(f"\nProcessing chunk {chunk_idx+1}/{total_rows}")

    if chunk.empty:
        continue

    # Prepare data for Surprise
    data = Dataset.load_from_df(chunk[['author', 'hotel_id', 'rating']], reader)

    # Define SVD model
    model = SVD(
        n_factors=150,
        n_epochs=30,
        lr_all=0.003,
        reg_all=0.05
    )

    # Cross-validate
    results = cross_validate(model, data, measures=['RMSE', 'MAE', 'FCP'], cv=3, verbose=True)

    # Store metrics
    all_rmse.append(results['test_rmse'].mean())
    all_mae.append(results['test_mae'].mean())
    all_fcp.append(results['test_fcp'].mean())

    # Save the model only from the first chunk
    if chunk_idx == 0:
        from surprise import dump
        model_save_path = os.path.join(current_dir, 'models', 'svd_model')
        dump.dump(model_save_path, algo=model)
        print(f"Saved baseline SVD model from chunk {chunk_idx+1}")

Processing chunks:   0%|          | 0/33 [00:00<?, ?it/s]


Processing chunk 1/33
Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9459  0.9449  0.9464  0.9457  0.0006  
MAE (testset)     0.7115  0.7109  0.7111  0.7112  0.0003  
FCP (testset)     0.6112  0.6081  0.6007  0.6066  0.0044  
Fit time          9.72    9.74    9.54    9.67    0.09    
Test time         1.18    0.95    0.73    0.95    0.18    


Processing chunks:   3%|▎         | 1/33 [01:13<39:04, 73.27s/it]

Saved baseline SVD model from chunk 1

Processing chunk 2/33


Processing chunks:   6%|▌         | 2/33 [02:27<38:13, 73.97s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9631  0.9599  0.9593  0.9607  0.0017  
MAE (testset)     0.7189  0.7168  0.7169  0.7175  0.0010  
FCP (testset)     0.5813  0.5798  0.5730  0.5780  0.0036  
Fit time          10.02   10.27   10.24   10.18   0.11    
Test time         1.19    0.94    1.15    1.09    0.11    

Processing chunk 3/33


Processing chunks:   9%|▉         | 3/33 [04:19<45:42, 91.43s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9931  0.9941  0.9902  0.9925  0.0016  
MAE (testset)     0.7538  0.7544  0.7525  0.7536  0.0008  
FCP (testset)     0.5644  0.5634  0.5655  0.5644  0.0009  
Fit time          9.71    9.70    9.89    9.77    0.08    
Test time         1.22    0.95    1.16    1.11    0.12    

Processing chunk 4/33


Processing chunks:  12%|█▏        | 4/33 [05:14<37:03, 76.69s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9283  0.9265  0.9255  0.9268  0.0011  
MAE (testset)     0.7024  0.7015  0.7006  0.7015  0.0008  
FCP (testset)     0.6234  0.6294  0.6120  0.6216  0.0072  
Fit time          9.51    9.35    9.27    9.38    0.10    
Test time         1.26    0.87    0.92    1.02    0.17    

Processing chunk 5/33


Processing chunks:  15%|█▌        | 5/33 [06:04<31:20, 67.15s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9337  0.9358  0.9351  0.9349  0.0009  
MAE (testset)     0.7119  0.7131  0.7152  0.7134  0.0014  
FCP (testset)     0.6005  0.6024  0.6074  0.6034  0.0029  
Fit time          9.60    9.63    9.38    9.54    0.11    
Test time         1.02    0.94    0.98    0.98    0.03    

Processing chunk 6/33


Processing chunks:  18%|█▊        | 6/33 [07:05<29:21, 65.22s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9362  0.9370  0.9357  0.9363  0.0006  
MAE (testset)     0.7109  0.7126  0.7118  0.7118  0.0007  
FCP (testset)     0.6189  0.6148  0.6044  0.6127  0.0061  
Fit time          9.67    9.52    9.51    9.57    0.07    
Test time         0.96    0.91    0.96    0.94    0.02    

Processing chunk 7/33


Processing chunks:  21%|██        | 7/33 [07:58<26:27, 61.06s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9717  0.9725  0.9714  0.9719  0.0004  
MAE (testset)     0.7221  0.7220  0.7216  0.7219  0.0002  
FCP (testset)     0.5414  0.5466  0.5417  0.5432  0.0024  
Fit time          10.93   10.74   11.41   11.03   0.28    
Test time         1.28    0.94    1.07    1.10    0.14    

Processing chunk 8/33


Processing chunks:  24%|██▍       | 8/33 [09:02<25:55, 62.23s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9476  0.9486  0.9504  0.9489  0.0011  
MAE (testset)     0.7126  0.7140  0.7146  0.7138  0.0008  
FCP (testset)     0.6069  0.6009  0.5837  0.5972  0.0098  
Fit time          10.37   10.74   10.66   10.59   0.16    
Test time         1.26    1.14    1.03    1.14    0.09    

Processing chunk 9/33


Processing chunks:  27%|██▋       | 9/33 [09:59<24:11, 60.48s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9774  0.9763  0.9781  0.9773  0.0007  
MAE (testset)     0.7268  0.7261  0.7270  0.7266  0.0003  
FCP (testset)     0.5129  0.5129  0.5258  0.5172  0.0061  
Fit time          11.86   11.55   11.73   11.71   0.13    
Test time         1.35    1.00    1.03    1.13    0.16    

Processing chunk 10/33


Processing chunks:  30%|███       | 10/33 [10:55<22:35, 58.95s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9814  0.9816  0.9776  0.9802  0.0018  
MAE (testset)     0.7291  0.7289  0.7266  0.7282  0.0011  
FCP (testset)     0.5308  0.5280  0.5143  0.5243  0.0072  
Fit time          11.29   11.12   11.40   11.27   0.12    
Test time         1.38    0.95    1.19    1.17    0.17    

Processing chunk 11/33


Processing chunks:  33%|███▎      | 11/33 [11:51<21:18, 58.13s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9853  0.9842  0.9846  0.9847  0.0005  
MAE (testset)     0.7347  0.7351  0.7344  0.7347  0.0003  
FCP (testset)     0.5115  0.5149  0.5056  0.5107  0.0039  
Fit time          11.22   11.26   11.51   11.33   0.13    
Test time         1.28    0.95    1.19    1.14    0.14    

Processing chunk 12/33


Processing chunks:  36%|███▋      | 12/33 [12:46<20:01, 57.23s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9905  0.9926  0.9928  0.9920  0.0010  
MAE (testset)     0.7403  0.7424  0.7421  0.7416  0.0009  
FCP (testset)     0.5174  0.5127  0.5201  0.5167  0.0031  
Fit time          10.99   10.79   11.13   10.97   0.14    
Test time         1.25    1.10    1.02    1.12    0.10    

Processing chunk 13/33


Processing chunks:  39%|███▉      | 13/33 [13:39<18:35, 55.79s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0695  1.0700  1.0716  1.0703  0.0009  
MAE (testset)     0.8267  0.8273  0.8289  0.8277  0.0009  
FCP (testset)     0.5577  0.5580  0.5632  0.5596  0.0025  
Fit time          13.07   12.79   13.19   13.02   0.17    
Test time         1.43    1.07    1.42    1.30    0.17    

Processing chunk 14/33


Processing chunks:  42%|████▏     | 14/33 [14:34<17:39, 55.75s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0013  0.9956  0.9965  0.9978  0.0025  
MAE (testset)     0.7534  0.7502  0.7501  0.7512  0.0016  
FCP (testset)     0.5645  0.5732  0.5548  0.5642  0.0075  
Fit time          11.89   11.51   12.03   11.81   0.22    
Test time         1.06    1.25    1.08    1.13    0.09    

Processing chunk 15/33


Processing chunks:  45%|████▌     | 15/33 [15:34<17:07, 57.09s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9741  0.9791  0.9772  0.9768  0.0020  
MAE (testset)     0.7277  0.7317  0.7309  0.7301  0.0017  
FCP (testset)     0.5472  0.5327  0.5380  0.5393  0.0060  
Fit time          11.68   10.53   10.66   10.96   0.51    
Test time         1.32    1.14    0.94    1.13    0.16    

Processing chunk 16/33


Processing chunks:  48%|████▊     | 16/33 [16:23<15:27, 54.56s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0161  1.0158  1.0167  1.0162  0.0003  
MAE (testset)     0.7662  0.7670  0.7664  0.7665  0.0003  
FCP (testset)     0.5157  0.5336  0.5231  0.5241  0.0074  
Fit time          12.44   11.77   12.00   12.07   0.28    
Test time         1.33    0.94    1.01    1.10    0.17    

Processing chunk 17/33


Processing chunks:  52%|█████▏    | 17/33 [17:17<14:30, 54.38s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0841  1.0844  1.0853  1.0846  0.0005  
MAE (testset)     0.8368  0.8371  0.8374  0.8371  0.0002  
FCP (testset)     0.5488  0.5506  0.5741  0.5578  0.0115  
Fit time          12.28   11.97   12.46   12.24   0.20    
Test time         1.12    1.34    1.09    1.18    0.11    

Processing chunk 18/33


Processing chunks:  55%|█████▍    | 18/33 [19:37<19:59, 79.96s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9539  0.9538  0.9564  0.9547  0.0012  
MAE (testset)     0.7254  0.7254  0.7266  0.7258  0.0006  
FCP (testset)     0.6239  0.6291  0.6215  0.6249  0.0032  
Fit time          9.92    9.38    9.31    9.54    0.27    
Test time         1.16    1.05    0.92    1.05    0.10    

Processing chunk 19/33


Processing chunks:  58%|█████▊    | 19/33 [20:47<17:58, 77.00s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9388  0.9409  0.9431  0.9409  0.0018  
MAE (testset)     0.7068  0.7067  0.7094  0.7077  0.0013  
FCP (testset)     0.6044  0.6158  0.5949  0.6050  0.0085  
Fit time          9.74    8.90    9.57    9.41    0.36    
Test time         1.13    0.81    0.92    0.96    0.13    

Processing chunk 20/33


Processing chunks:  61%|██████    | 20/33 [21:56<16:09, 74.59s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9344  0.9338  0.9349  0.9344  0.0005  
MAE (testset)     0.7024  0.7020  0.7031  0.7025  0.0005  
FCP (testset)     0.6241  0.6182  0.6224  0.6215  0.0025  
Fit time          9.73    9.50    10.03   9.75    0.22    
Test time         1.02    0.69    1.17    0.96    0.20    

Processing chunk 21/33


Processing chunks:  64%|██████▎   | 21/33 [23:29<16:04, 80.35s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9562  0.9550  0.9571  0.9561  0.0009  
MAE (testset)     0.7232  0.7235  0.7232  0.7233  0.0001  
FCP (testset)     0.5998  0.6063  0.6009  0.6023  0.0029  
Fit time          9.92    9.82    9.87    9.87    0.04    
Test time         1.01    0.98    1.21    1.07    0.10    

Processing chunk 22/33


Processing chunks:  67%|██████▋   | 22/33 [25:08<15:44, 85.84s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9632  0.9656  0.9629  0.9639  0.0012  
MAE (testset)     0.7267  0.7277  0.7253  0.7265  0.0010  
FCP (testset)     0.5827  0.5714  0.5895  0.5812  0.0075  
Fit time          10.21   9.65    10.27   10.04   0.28    
Test time         1.27    0.92    1.01    1.06    0.15    

Processing chunk 23/33


Processing chunks:  70%|██████▉   | 23/33 [26:44<14:47, 88.76s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9629  0.9638  0.9659  0.9642  0.0013  
MAE (testset)     0.7293  0.7300  0.7320  0.7304  0.0012  
FCP (testset)     0.6097  0.6066  0.6123  0.6095  0.0024  
Fit time          10.23   9.99    10.51   10.24   0.21    
Test time         1.26    0.93    1.09    1.09    0.13    

Processing chunk 24/33


Processing chunks:  73%|███████▎  | 24/33 [28:25<13:53, 92.56s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9701  0.9688  0.9704  0.9698  0.0007  
MAE (testset)     0.7383  0.7377  0.7386  0.7382  0.0004  
FCP (testset)     0.5875  0.5872  0.5730  0.5826  0.0068  
Fit time          10.24   9.78    9.66    9.89    0.25    
Test time         1.03    0.90    0.98    0.97    0.05    

Processing chunk 25/33


Processing chunks:  76%|███████▌  | 25/33 [29:23<10:56, 82.05s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9308  0.9356  0.9335  0.9333  0.0019  
MAE (testset)     0.7126  0.7162  0.7135  0.7141  0.0015  
FCP (testset)     0.6143  0.6213  0.6191  0.6183  0.0029  
Fit time          9.86    9.31    9.51    9.56    0.23    
Test time         1.03    1.05    1.13    1.07    0.04    

Processing chunk 26/33


Processing chunks:  79%|███████▉  | 26/33 [30:15<08:32, 73.19s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9358  0.9350  0.9342  0.9350  0.0007  
MAE (testset)     0.7168  0.7161  0.7163  0.7164  0.0003  
FCP (testset)     0.6096  0.6141  0.6183  0.6140  0.0035  
Fit time          10.02   9.93    9.95    9.97    0.04    
Test time         1.24    0.97    0.98    1.07    0.13    

Processing chunk 27/33


Processing chunks:  82%|████████▏ | 27/33 [31:04<06:35, 65.91s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9439  0.9423  0.9415  0.9425  0.0010  
MAE (testset)     0.7242  0.7229  0.7223  0.7231  0.0008  
FCP (testset)     0.5601  0.5595  0.5532  0.5576  0.0031  
Fit time          9.94    9.79    9.81    9.84    0.07    
Test time         1.02    0.91    0.72    0.88    0.12    

Processing chunk 28/33


Processing chunks:  85%|████████▍ | 28/33 [31:51<05:01, 60.34s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9316  0.9303  0.9318  0.9312  0.0007  
MAE (testset)     0.7111  0.7106  0.7121  0.7113  0.0006  
FCP (testset)     0.6151  0.6211  0.5905  0.6089  0.0133  
Fit time          9.70    9.49    9.61    9.60    0.09    
Test time         1.20    1.06    0.91    1.06    0.12    

Processing chunk 29/33


Processing chunks:  88%|████████▊ | 29/33 [32:51<04:00, 60.19s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9446  0.9449  0.9436  0.9444  0.0006  
MAE (testset)     0.7188  0.7185  0.7177  0.7184  0.0005  
FCP (testset)     0.6368  0.6263  0.6311  0.6314  0.0043  
Fit time          9.65    9.81    10.04   9.83    0.16    
Test time         1.09    0.93    0.82    0.95    0.11    

Processing chunk 30/33


Processing chunks:  91%|█████████ | 30/33 [34:03<03:10, 63.56s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9462  0.9448  0.9444  0.9451  0.0007  
MAE (testset)     0.7139  0.7130  0.7118  0.7129  0.0008  
FCP (testset)     0.6079  0.6109  0.6118  0.6102  0.0017  
Fit time          10.11   9.83    10.29   10.08   0.19    
Test time         1.21    0.94    1.07    1.07    0.11    

Processing chunk 31/33


Processing chunks:  94%|█████████▍| 31/33 [35:22<02:16, 68.30s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9517  0.9538  0.9541  0.9532  0.0011  
MAE (testset)     0.7150  0.7169  0.7171  0.7163  0.0010  
FCP (testset)     0.5957  0.5950  0.5897  0.5935  0.0027  
Fit time          10.46   10.36   10.29   10.37   0.07    
Test time         1.29    0.98    1.20    1.16    0.13    

Processing chunk 32/33


Processing chunks:  97%|█████████▋| 32/33 [36:47<01:13, 73.41s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9588  0.9615  0.9618  0.9607  0.0014  
MAE (testset)     0.7170  0.7186  0.7185  0.7180  0.0007  
FCP (testset)     0.5484  0.5611  0.5510  0.5535  0.0055  
Fit time          11.29   10.34   11.34   10.99   0.46    
Test time         1.31    1.16    1.09    1.19    0.09    

Processing chunk 33/33


Processing chunks: 100%|██████████| 33/33 [38:07<00:00, 69.31s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9655  0.9662  0.9668  0.9662  0.0005  
MAE (testset)     0.7253  0.7256  0.7263  0.7257  0.0005  
FCP (testset)     0.5718  0.5826  0.5631  0.5725  0.0080  
Fit time          9.71    9.54    9.57    9.61    0.08    
Test time         1.14    0.87    1.11    1.04    0.12    





In [10]:
# --- Final Output ---
print("=== Final Results ===")
print(f"Average RMSE: {sum(all_rmse)/len(all_rmse):.4f}")
print(f"Average MAE:  {sum(all_mae)/len(all_mae):.4f}")
print(f"Average FCP:  {sum(all_fcp)/len(all_fcp):.4f}")

=== Final Results ===
Average RMSE: 0.9665
Average MAE:  0.7303
Average FCP:  0.5796


In [None]:
# Setup
num_chunks = (len(df) // chunk_size) + 1

# Store results
all_rmse = []
all_mae = []
all_fcp = []

In [None]:
# --- Processing ---    [~45 min]
for i in tqdm(range(num_chunks), desc="Processing chunks"):
    print(f"\nProcessing chunk {i+1}/{num_chunks}")
    df_chunk = df.iloc[i*chunk_size:(i+1)*chunk_size]

    if df_chunk.empty:
        continue

    # Prepare data for Surprise
    data = Dataset.load_from_df(df_chunk[['author', 'hotel_id', 'rating']], reader)

    # Define model
    model = SVD(
        n_factors=150,
        n_epochs=30,
        lr_all=0.003,
        reg_all=0.05
    )

    # Cross-validate
    results = cross_validate(model, data, measures=['RMSE', 'MAE', 'FCP'], cv=3, verbose=True)

    # Store average metrics
    all_rmse.append(results['test_rmse'].mean())
    all_mae.append(results['test_mae'].mean())
    all_fcp.append(results['test_fcp'].mean())

    if i == 0:
        model_save_path = os.path.join(current_dir, 'models', 'svd_model')
        dump.dump(model_save_path, algo=model)
        print(f"Saved baseline SVD model from chunk {i+1}")

Processing chunks:   0%|          | 0/33 [00:00<?, ?it/s]


Processing chunk 1/33


Processing chunks:   3%|▎         | 1/33 [01:29<47:31, 89.12s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9454  0.9469  0.9449  0.9457  0.0008  
MAE (testset)     0.7103  0.7114  0.7100  0.7106  0.0006  
FCP (testset)     0.6065  0.5995  0.6126  0.6062  0.0054  
Fit time          11.42   11.31   11.75   11.49   0.18    
Test time         1.24    1.04    1.23    1.17    0.09    

Processing chunk 2/33


Processing chunks:   6%|▌         | 2/33 [03:03<47:30, 91.96s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9601  0.9592  0.9620  0.9604  0.0012  
MAE (testset)     0.7177  0.7174  0.7181  0.7177  0.0003  
FCP (testset)     0.5739  0.5656  0.5760  0.5719  0.0045  
Fit time          12.20   12.68   12.48   12.45   0.20    
Test time         1.35    1.19    1.28    1.27    0.07    

Processing chunk 3/33


Processing chunks:   9%|▉         | 3/33 [05:36<1:00:04, 120.15s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9914  0.9934  0.9923  0.9924  0.0008  
MAE (testset)     0.7530  0.7541  0.7523  0.7531  0.0008  
FCP (testset)     0.5492  0.5649  0.5710  0.5617  0.0092  
Fit time          12.66   12.51   12.54   12.57   0.07    
Test time         1.22    1.12    1.12    1.15    0.05    

Processing chunk 4/33


Processing chunks:  12%|█▏        | 4/33 [06:42<47:46, 98.85s/it]   

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9267  0.9278  0.9259  0.9268  0.0008  
MAE (testset)     0.7017  0.7019  0.7011  0.7016  0.0003  
FCP (testset)     0.5985  0.6146  0.6239  0.6123  0.0105  
Fit time          11.86   11.14   11.72   11.58   0.31    
Test time         1.72    1.12    1.07    1.30    0.29    

Processing chunk 5/33


Processing chunks:  15%|█▌        | 5/33 [07:43<39:37, 84.92s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9324  0.9361  0.9359  0.9348  0.0017  
MAE (testset)     0.7125  0.7142  0.7141  0.7136  0.0008  
FCP (testset)     0.5950  0.6025  0.6017  0.5997  0.0034  
Fit time          10.41   11.29   12.02   11.24   0.66    
Test time         1.39    1.33    1.34    1.35    0.03    

Processing chunk 6/33


Processing chunks:  18%|█▊        | 6/33 [09:05<37:46, 83.96s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9362  0.9374  0.9353  0.9363  0.0009  
MAE (testset)     0.7128  0.7125  0.7109  0.7121  0.0008  
FCP (testset)     0.6142  0.6168  0.6181  0.6164  0.0016  
Fit time          12.83   13.30   11.86   12.66   0.60    
Test time         1.84    1.25    1.30    1.46    0.27    

Processing chunk 7/33


Processing chunks:  21%|██        | 7/33 [10:09<33:38, 77.65s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9698  0.9742  0.9717  0.9719  0.0018  
MAE (testset)     0.7211  0.7236  0.7217  0.7221  0.0011  
FCP (testset)     0.5434  0.5480  0.5483  0.5466  0.0023  
Fit time          13.47   13.88   14.07   13.81   0.25    
Test time         1.67    1.34    1.21    1.41    0.19    

Processing chunk 8/33


Processing chunks:  24%|██▍       | 8/33 [11:26<32:12, 77.32s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9489  0.9474  0.9508  0.9491  0.0014  
MAE (testset)     0.7135  0.7134  0.7147  0.7138  0.0006  
FCP (testset)     0.5951  0.5981  0.6001  0.5977  0.0021  
Fit time          12.97   12.42   11.91   12.43   0.43    
Test time         1.30    1.29    1.45    1.34    0.07    

Processing chunk 9/33


Processing chunks:  27%|██▋       | 9/33 [12:29<29:04, 72.67s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9797  0.9768  0.9769  0.9778  0.0013  
MAE (testset)     0.7280  0.7265  0.7262  0.7269  0.0008  
FCP (testset)     0.5273  0.5127  0.5266  0.5222  0.0067  
Fit time          12.93   12.52   13.07   12.84   0.23    
Test time         1.44    1.31    1.31    1.35    0.06    

Processing chunk 10/33


Processing chunks:  30%|███       | 10/33 [13:33<26:56, 70.29s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9809  0.9782  0.9816  0.9802  0.0015  
MAE (testset)     0.7290  0.7270  0.7291  0.7284  0.0010  
FCP (testset)     0.5291  0.5243  0.5196  0.5243  0.0039  
Fit time          13.45   13.00   13.81   13.42   0.33    
Test time         1.54    1.26    1.21    1.34    0.14    

Processing chunk 11/33


Processing chunks:  33%|███▎      | 11/33 [14:41<25:30, 69.57s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9859  0.9853  0.9832  0.9848  0.0012  
MAE (testset)     0.7356  0.7351  0.7336  0.7348  0.0008  
FCP (testset)     0.5197  0.5132  0.5050  0.5126  0.0060  
Fit time          13.71   12.97   14.28   13.65   0.54    
Test time         1.46    1.40    1.14    1.33    0.14    

Processing chunk 12/33


Processing chunks:  36%|███▋      | 12/33 [15:51<24:24, 69.72s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9921  0.9915  0.9929  0.9922  0.0006  
MAE (testset)     0.7418  0.7416  0.7421  0.7418  0.0002  
FCP (testset)     0.5026  0.5158  0.5126  0.5103  0.0056  
Fit time          14.60   13.45   13.68   13.91   0.49    
Test time         1.27    1.28    1.24    1.27    0.02    

Processing chunk 13/33


Processing chunks:  39%|███▉      | 13/33 [16:52<22:16, 66.82s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0694  1.0728  1.0698  1.0707  0.0015  
MAE (testset)     0.8274  0.8290  0.8270  0.8278  0.0009  
FCP (testset)     0.5596  0.5651  0.5465  0.5570  0.0078  
Fit time          15.61   15.17   14.99   15.25   0.26    
Test time         1.76    1.31    1.55    1.54    0.18    

Processing chunk 14/33


Processing chunks:  42%|████▏     | 14/33 [17:56<20:54, 66.04s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9979  0.9992  0.9962  0.9978  0.0012  
MAE (testset)     0.7514  0.7520  0.7500  0.7511  0.0008  
FCP (testset)     0.5662  0.5652  0.5642  0.5652  0.0008  
Fit time          13.80   13.15   13.98   13.64   0.35    
Test time         1.32    1.19    1.38    1.30    0.08    

Processing chunk 15/33


Processing chunks:  45%|████▌     | 15/33 [19:06<20:12, 67.37s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9790  0.9774  0.9745  0.9770  0.0019  
MAE (testset)     0.7312  0.7306  0.7293  0.7304  0.0008  
FCP (testset)     0.5209  0.5306  0.5411  0.5308  0.0082  
Fit time          12.92   12.99   12.84   12.92   0.06    
Test time         1.51    1.45    1.28    1.41    0.10    

Processing chunk 16/33


Processing chunks:  48%|████▊     | 16/33 [20:07<18:30, 65.33s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0182  1.0139  1.0162  1.0161  0.0017  
MAE (testset)     0.7677  0.7655  0.7665  0.7666  0.0009  
FCP (testset)     0.5242  0.5240  0.5099  0.5193  0.0067  
Fit time          15.18   15.10   16.06   15.45   0.44    
Test time         1.14    1.43    1.40    1.32    0.13    

Processing chunk 17/33


Processing chunks:  52%|█████▏    | 17/33 [21:13<17:27, 65.45s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.0851  1.0848  1.0848  1.0849  0.0002  
MAE (testset)     0.8371  0.8369  0.8378  0.8372  0.0004  
FCP (testset)     0.5642  0.5569  0.5600  0.5604  0.0030  
Fit time          15.16   15.83   14.40   15.13   0.58    
Test time         1.24    1.44    1.42    1.37    0.09    

Processing chunk 18/33


Processing chunks:  55%|█████▍    | 18/33 [22:59<19:26, 77.76s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9544  0.9543  0.9557  0.9548  0.0006  
MAE (testset)     0.7253  0.7257  0.7267  0.7259  0.0006  
FCP (testset)     0.6199  0.6196  0.6214  0.6203  0.0008  
Fit time          12.18   12.45   11.77   12.13   0.28    
Test time         1.05    0.94    1.44    1.14    0.21    

Processing chunk 19/33


Processing chunks:  58%|█████▊    | 19/33 [24:29<19:00, 81.47s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9404  0.9422  0.9404  0.9410  0.0009  
MAE (testset)     0.7076  0.7086  0.7062  0.7075  0.0010  
FCP (testset)     0.6080  0.6188  0.6052  0.6107  0.0059  
Fit time          12.76   11.65   11.44   11.95   0.58    
Test time         1.21    1.12    1.16    1.16    0.04    

Processing chunk 20/33


Processing chunks:  61%|██████    | 20/33 [25:56<18:01, 83.22s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9346  0.9336  0.9348  0.9343  0.0005  
MAE (testset)     0.7023  0.7018  0.7036  0.7026  0.0008  
FCP (testset)     0.6223  0.6121  0.6198  0.6180  0.0043  
Fit time          11.58   12.40   12.99   12.32   0.58    
Test time         1.53    1.21    1.12    1.29    0.18    

Processing chunk 21/33


Processing chunks:  64%|██████▎   | 21/33 [28:02<19:11, 95.92s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9549  0.9556  0.9579  0.9562  0.0013  
MAE (testset)     0.7234  0.7230  0.7244  0.7236  0.0006  
FCP (testset)     0.6070  0.6034  0.5984  0.6029  0.0035  
Fit time          13.06   11.91   11.86   12.28   0.56    
Test time         1.52    1.08    1.25    1.29    0.18    

Processing chunk 22/33


Processing chunks:  67%|██████▋   | 22/33 [30:12<19:26, 106.07s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9651  0.9654  0.9616  0.9640  0.0017  
MAE (testset)     0.7261  0.7266  0.7255  0.7261  0.0004  
FCP (testset)     0.5688  0.5938  0.5839  0.5821  0.0103  
Fit time          13.42   12.39   12.68   12.83   0.43    
Test time         1.53    1.14    1.17    1.28    0.18    

Processing chunk 23/33


Processing chunks:  70%|██████▉   | 23/33 [32:11<18:20, 110.05s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9629  0.9629  0.9674  0.9644  0.0021  
MAE (testset)     0.7301  0.7294  0.7320  0.7305  0.0011  
FCP (testset)     0.5969  0.6145  0.6151  0.6088  0.0085  
Fit time          11.55   11.99   12.74   12.09   0.49    
Test time         1.30    1.31    1.13    1.25    0.08    

Processing chunk 24/33


Processing chunks:  73%|███████▎  | 24/33 [34:18<17:16, 115.16s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9698  0.9694  0.9699  0.9697  0.0002  
MAE (testset)     0.7375  0.7382  0.7386  0.7381  0.0004  
FCP (testset)     0.5767  0.5776  0.5900  0.5814  0.0061  
Fit time          12.68   11.23   11.28   11.73   0.67    
Test time         1.10    1.19    1.19    1.16    0.04    

Processing chunk 25/33


Processing chunks:  76%|███████▌  | 25/33 [35:30<13:36, 102.08s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9349  0.9316  0.9334  0.9333  0.0014  
MAE (testset)     0.7148  0.7135  0.7140  0.7141  0.0005  
FCP (testset)     0.6181  0.6197  0.6049  0.6142  0.0066  
Fit time          12.55   11.48   12.21   12.08   0.45    
Test time         1.33    1.01    1.06    1.13    0.14    

Processing chunk 26/33


Processing chunks:  79%|███████▉  | 26/33 [36:30<10:26, 89.43s/it] 

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9322  0.9359  0.9374  0.9352  0.0022  
MAE (testset)     0.7151  0.7175  0.7185  0.7170  0.0014  
FCP (testset)     0.6207  0.6000  0.6231  0.6146  0.0104  
Fit time          11.72   11.62   11.56   11.63   0.07    
Test time         1.44    1.09    0.99    1.17    0.19    

Processing chunk 27/33


Processing chunks:  82%|████████▏ | 27/33 [37:25<07:55, 79.27s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9409  0.9422  0.9444  0.9425  0.0014  
MAE (testset)     0.7225  0.7240  0.7240  0.7235  0.0007  
FCP (testset)     0.5568  0.5527  0.5507  0.5534  0.0025  
Fit time          11.77   11.32   11.28   11.45   0.22    
Test time         1.30    1.01    1.52    1.28    0.21    

Processing chunk 28/33


Processing chunks:  85%|████████▍ | 28/33 [38:25<06:07, 73.58s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9314  0.9290  0.9338  0.9314  0.0020  
MAE (testset)     0.7122  0.7104  0.7134  0.7120  0.0012  
FCP (testset)     0.6251  0.6088  0.6210  0.6183  0.0069  
Fit time          12.09   12.43   12.03   12.19   0.18    
Test time         1.40    1.41    1.51    1.44    0.05    

Processing chunk 29/33


Processing chunks:  88%|████████▊ | 29/33 [39:37<04:51, 72.98s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9467  0.9432  0.9433  0.9444  0.0016  
MAE (testset)     0.7186  0.7175  0.7169  0.7177  0.0007  
FCP (testset)     0.6335  0.6326  0.6321  0.6327  0.0006  
Fit time          12.54   11.40   11.61   11.85   0.49    
Test time         1.17    1.14    1.01    1.11    0.07    

Processing chunk 30/33


Processing chunks:  91%|█████████ | 30/33 [41:06<03:53, 77.78s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9473  0.9434  0.9443  0.9450  0.0017  
MAE (testset)     0.7143  0.7121  0.7113  0.7126  0.0013  
FCP (testset)     0.6083  0.5964  0.6109  0.6052  0.0063  
Fit time          12.44   12.48   11.41   12.11   0.50    
Test time         1.37    1.48    1.15    1.33    0.14    

Processing chunk 31/33


Processing chunks:  94%|█████████▍| 31/33 [42:36<02:42, 81.34s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9559  0.9497  0.9536  0.9531  0.0025  
MAE (testset)     0.7177  0.7139  0.7168  0.7161  0.0016  
FCP (testset)     0.5929  0.5927  0.5881  0.5912  0.0022  
Fit time          11.37   11.44   12.03   11.61   0.30    
Test time         1.18    1.28    1.32    1.26    0.06    

Processing chunk 32/33


Processing chunks:  97%|█████████▋| 32/33 [44:13<01:26, 86.09s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9611  0.9627  0.9581  0.9606  0.0019  
MAE (testset)     0.7184  0.7186  0.7171  0.7180  0.0007  
FCP (testset)     0.5665  0.5716  0.5647  0.5676  0.0029  
Fit time          12.20   11.55   12.35   12.03   0.35    
Test time         1.41    1.31    1.39    1.37    0.04    

Processing chunk 33/33


Processing chunks: 100%|██████████| 33/33 [45:43<00:00, 83.14s/it]

Evaluating RMSE, MAE, FCP of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9644  0.9665  0.9680  0.9663  0.0015  
MAE (testset)     0.7246  0.7257  0.7272  0.7258  0.0011  
FCP (testset)     0.5643  0.5808  0.5690  0.5714  0.0069  
Fit time          10.93   10.51   10.48   10.64   0.21    
Test time         1.16    1.00    0.97    1.04    0.08    





In [15]:
# --- Final Output ---
print("\n=== Results ===")
print(f"Average RMSE across chunks: {sum(all_rmse)/len(all_rmse):.4f}")
print(f"Average MAE across chunks: {sum(all_mae)/len(all_mae):.4f}")
print(f"Average FCP:  {sum(all_fcp) / len(all_fcp):.4f}")


=== Results ===
Average RMSE across chunks: 0.9636
Average MAE across chunks: 0.7279
Average FCP:  0.5790
