In [None]:
import os
os.chdir('/content/drive/MyDrive/workspace/Learning_Equality/SVM_DPR_paraphrase-multilingual-mpnet-base-v2-finetuned')

In [None]:
!pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
!pip install cuml-cu11 --extra-index-url=https://pypi.ngc.nvidia.com

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://pypi.ngc.nvidia.com


In [None]:
!pip install -U transformers
!pip install sentencepiece
!pip install -U fuzzywuzzy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import lightgbm
import cupy as cp
from cuml.svm import SVC
import cuml
from cuml.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from pathlib import Path
import pickle
import tqdm
import numpy as np
import cudf
import pandas as pd
import gc
import cupy

In [None]:
class CFG:
    DATA_DIR = Path('./train_data')

In [None]:
train_df = cudf.read_csv(CFG.DATA_DIR / 'train_df.csv', index_col=0)
train_idx_dict = pickle.load(open(CFG.DATA_DIR / 'train_idx.pkl',mode='rb'))
topics_embedding_cudf = cudf.DataFrame(np.load(CFG.DATA_DIR / 'topics_embedding.npy'))
contents_embedding_cudf = cudf.DataFrame(np.load(CFG.DATA_DIR / 'contents_embedding.npy'))

In [None]:
train_to_idx = cp.array(train_idx_dict["train_to_idx"])
train_co_idx = cp.array(train_idx_dict["train_co_idx"])

In [None]:
SAVE_DIR = Path('./svm_model')
for i in range(5):
    SAVE_DIR.mkdir(parents=True, exist_ok=True)
    model_save_path = SAVE_DIR / f'fold_{i}.pkl'
    tr_idx = train_df[train_df.fold!=i].index.to_numpy()
    tr_y = (train_df.label.iloc[tr_idx] * 1).to_numpy()
    val_idx = train_df[train_df.fold==i].index.to_numpy()
    val_y = (train_df.label.iloc[val_idx] * 1)
    rus = RandomUnderSampler(random_state=42)
    tr_idx, tr_y = rus.fit_resample(tr_idx.reshape(-1, 1), tr_y)
    tr_idx = cupy.array(tr_idx)
    val_idx = cupy.array(val_idx)
    print(f'train_num:{len(tr_y)} validation_num:{len(val_y)}')
    tr_idx, tr_y = cupy.array(tr_idx), cupy.array(tr_y)
    tr_idx = tr_idx.reshape(-1)
    tr_X_topic =  topics_embedding_cudf.iloc[train_to_idx[tr_idx]].values
    tr_X_content =  contents_embedding_cudf.iloc[train_co_idx[tr_idx]].values
    tr_X = cp.hstack((tr_X_topic, tr_X_content))
    del tr_X_topic, tr_X_content
    gc.collect()
    if Path(model_save_path).exists():
        print(f'====loading fold:{i}====')
        with open(model_save_path, 'rb') as f:
            pipline = pickle.load(f)
        scaler = pipline['scaler']
        pca = pipline['pca']
        model = pipline['model']
    else:
        print(f'====training fold:{i}====')
        scaler = MinMaxScaler()
        pca = cuml.PCA(n_components=500)
        tr_X = pca.fit_transform(tr_X)
        tr_X = scaler.fit_transform(tr_X)
        model = SVC(kernel='rbf', C=10, gamma=1, verbose=True, probability=True, max_iter=1000)
        model.fit(tr_X, tr_y)
        del tr_X
        gc.collect()
        with open(model_save_path, 'wb') as f:
          pickle.dump({'pca':pca,
                       'scaler':scaler, 
                       'model':model}, f)
    for val_idx_split in tqdm.tqdm(cp.array_split(val_idx, 30), total=30):
      val_X_topic =  topics_embedding_cudf.iloc[train_to_idx[val_idx_split]].values
      val_X_content =  contents_embedding_cudf.iloc[train_co_idx[val_idx_split]].values
      val_X = cp.hstack((val_X_topic, val_X_content))
      val_X = pca.transform(val_X)
      del val_X_topic, val_X_content
      gc.collect()
      val_X = cp.array(scaler.transform(val_X.get()))
      val_pred = model.predict_proba(val_X)
      train_df.loc[val_idx_split, 'predictions_proba'] = val_pred.get()[:,1]
      del val_X
      gc.collect()

train_num:401270 validation_num:12177402
====loading fold:0====


100%|██████████| 30/30 [42:48<00:00, 85.63s/it]


train_num:399110 validation_num:12189909
====training fold:1====
[D] [11:13:42.466183] /project/cpp/src/svm/workingset.cuh:124 Creating working set with 1024 elements
[D] [11:13:42.468708] /project/cpp/src/svm/kernelcache.cuh:133 Allocating kernel tile, size: 1247 MiB
[D] [11:13:42.470184] /project/cpp/src/svm/kernelcache.cuh:138 Allocating x_ws, size: 500 KiB
[D] [11:14:03.536092] /project/cpp/src/svm/smosolver.cuh:185 SMO iteration 500, diff 0.924466
[D] [11:14:24.684750] /project/cpp/src/svm/smosolver.cuh:185 SMO iteration 1000, diff 0.134985
[D] [11:14:24.684869] /project/cpp/src/svm/smosolver.cuh:188 SMO solver finished after 1000 outer iterations, total inner iterations, and diff 0.134985
[D] [11:14:24.696453] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:14:28.485181] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/l

  0%|          | 0/30 [00:00<?, ?it/s]

[D] [11:17:43.143036] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:17:44.627149] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  3%|▎         | 1/30 [01:20<39:03, 80.82s/it]

[D] [11:19:03.937172] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:19:05.387642] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  7%|▋         | 2/30 [02:41<37:42, 80.80s/it]

[D] [11:20:24.739966] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:20:26.199688] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 10%|█         | 3/30 [04:02<36:21, 80.78s/it]

[D] [11:21:45.477933] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:21:46.938011] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 13%|█▎        | 4/30 [05:23<34:59, 80.77s/it]

[D] [11:23:06.234633] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:23:07.682093] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 17%|█▋        | 5/30 [06:43<33:38, 80.75s/it]

[D] [11:24:26.951574] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:24:28.398719] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 20%|██        | 6/30 [08:04<32:17, 80.74s/it]

[D] [11:25:47.683120] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:25:49.131816] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 23%|██▎       | 7/30 [09:25<30:56, 80.73s/it]

[D] [11:27:08.387172] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:27:09.832544] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 27%|██▋       | 8/30 [10:45<29:35, 80.72s/it]

[D] [11:28:29.099879] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:28:30.554488] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 30%|███       | 9/30 [12:06<28:15, 80.72s/it]

[D] [11:29:49.794972] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:29:51.246988] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 33%|███▎      | 10/30 [13:27<26:54, 80.71s/it]

[D] [11:31:10.513464] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:31:11.944897] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 37%|███▋      | 11/30 [14:48<25:33, 80.71s/it]

[D] [11:32:31.192346] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:32:32.647711] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 40%|████      | 12/30 [16:08<24:12, 80.71s/it]

[D] [11:33:51.923061] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:33:53.377423] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 43%|████▎     | 13/30 [17:29<22:51, 80.70s/it]

[D] [11:35:12.583160] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:35:14.033699] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 47%|████▋     | 14/30 [18:50<21:31, 80.70s/it]

[D] [11:36:33.295770] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:36:34.740689] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 50%|█████     | 15/30 [20:10<20:10, 80.70s/it]

[D] [11:37:53.963580] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:37:55.405594] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 53%|█████▎    | 16/30 [21:31<18:49, 80.69s/it]

[D] [11:39:14.651091] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:39:16.075171] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 57%|█████▋    | 17/30 [22:52<17:28, 80.65s/it]

[D] [11:40:35.196500] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:40:36.623823] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 60%|██████    | 18/30 [24:12<16:07, 80.65s/it]

[D] [11:41:55.861181] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:41:57.298149] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 63%|██████▎   | 19/30 [25:33<14:47, 80.65s/it]

[D] [11:43:16.478552] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:43:17.915062] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 67%|██████▋   | 20/30 [26:54<13:26, 80.65s/it]

[D] [11:44:37.144548] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:44:38.579242] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 70%|███████   | 21/30 [28:14<12:05, 80.65s/it]

[D] [11:45:57.784502] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:45:59.213634] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 73%|███████▎  | 22/30 [29:35<10:45, 80.64s/it]

[D] [11:47:18.437946] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:47:19.863369] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 77%|███████▋  | 23/30 [30:55<09:24, 80.64s/it]

[D] [11:48:39.056302] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:48:40.497492] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 80%|████████  | 24/30 [32:16<08:03, 80.64s/it]

[D] [11:49:59.720323] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:50:01.154161] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 83%|████████▎ | 25/30 [33:37<06:43, 80.65s/it]

[D] [11:51:20.377455] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:51:21.817149] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 87%|████████▋ | 26/30 [34:57<05:22, 80.66s/it]

[D] [11:52:41.069354] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:52:42.503990] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 90%|█████████ | 27/30 [36:18<04:02, 80.67s/it]

[D] [11:54:01.739887] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:54:03.178494] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 93%|█████████▎| 28/30 [37:39<02:41, 80.67s/it]

[D] [11:55:22.418051] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:55:23.851624] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 97%|█████████▋| 29/30 [38:59<01:20, 80.67s/it]

[D] [11:56:43.070907] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:56:44.514953] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


100%|██████████| 30/30 [40:20<00:00, 80.69s/it]


train_num:397928 validation_num:12184065
====training fold:2====
[D] [11:58:15.692530] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:58:15.898699] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:58:16.491556] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:58:16.691380] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [11:58:17.002468] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Exp

  0%|          | 0/30 [00:00<?, ?it/s]

[D] [12:02:21.162134] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:02:22.607807] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  3%|▎         | 1/30 [01:19<38:13, 79.08s/it]

[D] [12:03:40.179807] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:03:41.601461] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  7%|▋         | 2/30 [02:38<36:52, 79.03s/it]

[D] [12:04:59.185327] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:05:00.612620] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 10%|█         | 3/30 [03:57<35:33, 79.01s/it]

[D] [12:06:18.171239] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:06:19.591831] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 13%|█▎        | 4/30 [05:16<34:14, 79.00s/it]

[D] [12:07:37.162434] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:07:38.587956] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 17%|█▋        | 5/30 [06:35<32:55, 79.01s/it]

[D] [12:08:56.173597] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:08:57.596171] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 20%|██        | 6/30 [07:54<31:36, 79.00s/it]

[D] [12:10:15.175614] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:10:16.601925] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 23%|██▎       | 7/30 [09:13<30:16, 79.00s/it]

[D] [12:11:34.161104] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:11:35.588358] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 27%|██▋       | 8/30 [10:32<28:58, 79.00s/it]

[D] [12:12:53.164265] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:12:54.588375] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 30%|███       | 9/30 [11:51<27:38, 79.00s/it]

[D] [12:14:12.153381] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:14:13.586357] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 33%|███▎      | 10/30 [13:10<26:19, 78.99s/it]

[D] [12:15:31.137093] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:15:32.566193] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 37%|███▋      | 11/30 [14:29<25:00, 78.99s/it]

[D] [12:16:50.104033] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:16:51.529048] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 40%|████      | 12/30 [15:47<23:41, 78.99s/it]

[D] [12:18:09.094446] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:18:10.525598] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 43%|████▎     | 13/30 [17:07<22:22, 79.00s/it]

[D] [12:19:28.115265] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:19:29.522721] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 47%|████▋     | 14/30 [18:25<21:03, 78.99s/it]

[D] [12:20:47.094687] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:20:48.517278] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 50%|█████     | 15/30 [19:44<19:44, 78.99s/it]

[D] [12:22:06.072386] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:22:07.507284] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 53%|█████▎    | 16/30 [21:04<18:26, 79.00s/it]

[D] [12:23:25.107472] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:23:26.530168] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 57%|█████▋    | 17/30 [22:22<17:06, 79.00s/it]

[D] [12:24:44.093634] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:24:45.532807] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 60%|██████    | 18/30 [23:42<15:48, 79.01s/it]

[D] [12:26:03.120572] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:26:04.551281] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 63%|██████▎   | 19/30 [25:00<14:28, 79.00s/it]

[D] [12:27:22.100629] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:27:23.526117] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 67%|██████▋   | 20/30 [26:20<13:10, 79.01s/it]

[D] [12:28:41.129511] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:28:42.569353] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 70%|███████   | 21/30 [27:39<11:51, 79.01s/it]

[D] [12:30:00.159035] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:30:01.596637] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 73%|███████▎  | 22/30 [28:58<10:32, 79.02s/it]

[D] [12:31:19.178535] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:31:20.615939] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 77%|███████▋  | 23/30 [30:17<09:13, 79.02s/it]

[D] [12:32:38.195089] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:32:39.631162] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 80%|████████  | 24/30 [31:36<07:54, 79.02s/it]

[D] [12:33:57.225828] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:33:58.660818] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 83%|████████▎ | 25/30 [32:55<06:35, 79.01s/it]

[D] [12:35:16.228238] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:35:17.662959] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 87%|████████▋ | 26/30 [34:14<05:16, 79.02s/it]

[D] [12:36:35.244000] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:36:36.671111] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 90%|█████████ | 27/30 [35:33<03:57, 79.01s/it]

[D] [12:37:54.230538] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:37:55.669406] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 93%|█████████▎| 28/30 [36:52<02:38, 79.01s/it]

[D] [12:39:13.254494] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:39:14.678480] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 97%|█████████▋| 29/30 [38:11<01:19, 79.02s/it]

[D] [12:40:32.289917] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:40:33.731559] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


100%|██████████| 30/30 [39:30<00:00, 79.01s/it]


train_num:398790 validation_num:12183880
====training fold:3====
[D] [12:42:03.222684] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:42:03.439186] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:42:04.038744] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:42:04.241452] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:42:04.608361] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Exp

  0%|          | 0/30 [00:00<?, ?it/s]

[D] [12:46:05.662624] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:46:07.139942] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  3%|▎         | 1/30 [01:25<41:06, 85.04s/it]

[D] [12:47:30.698637] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:47:32.156413] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  7%|▋         | 2/30 [02:50<39:39, 85.00s/it]

[D] [12:48:55.669531] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:48:57.118628] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 10%|█         | 3/30 [04:14<38:14, 84.97s/it]

[D] [12:50:20.607531] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:50:22.064790] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 13%|█▎        | 4/30 [05:39<36:49, 84.97s/it]

[D] [12:51:45.563777] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:51:47.004465] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 17%|█▋        | 5/30 [07:04<35:23, 84.96s/it]

[D] [12:53:10.509410] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:53:11.962108] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 20%|██        | 6/30 [08:29<33:59, 84.96s/it]

[D] [12:54:35.479630] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:54:36.932737] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 23%|██▎       | 7/30 [09:54<32:34, 84.96s/it]

[D] [12:56:00.428793] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:56:01.879131] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 27%|██▋       | 8/30 [11:19<31:09, 84.96s/it]

[D] [12:57:25.384022] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:57:26.831551] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 30%|███       | 9/30 [12:44<29:44, 84.95s/it]

[D] [12:58:50.330286] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [12:58:51.768335] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 33%|███▎      | 10/30 [14:09<28:19, 84.96s/it]

[D] [13:00:15.287540] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:00:16.711374] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 37%|███▋      | 11/30 [15:34<26:53, 84.94s/it]

[D] [13:01:40.197979] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:01:41.612010] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 40%|████      | 12/30 [16:59<25:28, 84.93s/it]

[D] [13:03:05.101797] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:03:06.528812] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 43%|████▎     | 13/30 [18:24<24:03, 84.92s/it]

[D] [13:04:30.012012] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:04:31.438375] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 47%|████▋     | 14/30 [19:49<22:38, 84.93s/it]

[D] [13:05:54.944611] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:05:56.370141] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 50%|█████     | 15/30 [21:14<21:13, 84.92s/it]

[D] [13:07:19.863746] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:07:21.293580] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 53%|█████▎    | 16/30 [22:39<19:48, 84.92s/it]

[D] [13:08:44.781582] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:08:46.204322] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 57%|█████▋    | 17/30 [24:04<18:23, 84.92s/it]

[D] [13:10:09.700915] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:10:11.127442] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 60%|██████    | 18/30 [25:28<16:59, 84.92s/it]

[D] [13:11:34.604085] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:11:36.029237] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 63%|██████▎   | 19/30 [26:53<15:34, 84.91s/it]

[D] [13:12:59.500217] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:13:00.924834] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 67%|██████▋   | 20/30 [28:18<14:09, 84.92s/it]

[D] [13:14:24.432198] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:14:25.855036] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 70%|███████   | 21/30 [29:43<12:44, 84.92s/it]

[D] [13:15:49.360292] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:15:50.788091] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 73%|███████▎  | 22/30 [31:08<11:19, 84.92s/it]

[D] [13:17:14.287727] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:17:15.705255] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 77%|███████▋  | 23/30 [32:33<09:54, 84.92s/it]

[D] [13:18:39.198644] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:18:40.624203] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 80%|████████  | 24/30 [33:58<08:29, 84.92s/it]

[D] [13:20:04.129494] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:20:05.554504] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 83%|████████▎ | 25/30 [35:23<07:04, 84.92s/it]

[D] [13:21:29.038883] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:21:30.465521] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 87%|████████▋ | 26/30 [36:48<05:39, 84.92s/it]

[D] [13:22:53.953133] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:22:55.377648] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 90%|█████████ | 27/30 [38:13<04:14, 84.91s/it]

[D] [13:24:18.862176] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:24:20.276236] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 93%|█████████▎| 28/30 [39:38<02:49, 84.91s/it]

[D] [13:25:43.751371] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:25:45.171456] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 97%|█████████▋| 29/30 [41:02<01:24, 84.91s/it]

[D] [13:27:08.655035] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:27:10.098691] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


100%|██████████| 30/30 [42:27<00:00, 84.93s/it]


train_num:401150 validation_num:12177718
====training fold:4====
[D] [13:28:45.487694] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:28:45.689665] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:28:46.290341] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:28:46.496787] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:28:46.882158] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Exp

  0%|          | 0/30 [00:00<?, ?it/s]

[D] [13:32:49.961644] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:32:51.418805] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  3%|▎         | 1/30 [01:20<39:06, 80.91s/it]

[D] [13:34:10.874516] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:34:12.303124] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


  7%|▋         | 2/30 [02:41<37:44, 80.88s/it]

[D] [13:35:31.737160] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:35:33.146638] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 10%|█         | 3/30 [04:02<36:23, 80.87s/it]

[D] [13:36:52.585716] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:36:54.011047] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 13%|█▎        | 4/30 [05:23<35:02, 80.85s/it]

[D] [13:38:13.421426] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:38:14.840430] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 17%|█▋        | 5/30 [06:44<33:41, 80.86s/it]

[D] [13:39:34.270921] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:39:35.684361] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 20%|██        | 6/30 [08:05<32:20, 80.84s/it]

[D] [13:40:55.084956] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:40:56.498672] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 23%|██▎       | 7/30 [09:25<30:59, 80.83s/it]

[D] [13:42:15.906444] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:42:17.328168] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 27%|██▋       | 8/30 [10:46<29:38, 80.83s/it]

[D] [13:43:36.748045] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:43:38.173953] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 30%|███       | 9/30 [12:07<28:17, 80.84s/it]

[D] [13:44:57.599593] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:44:59.027616] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 33%|███▎      | 10/30 [13:28<26:56, 80.84s/it]

[D] [13:46:18.419134] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:46:19.845973] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 37%|███▋      | 11/30 [14:49<25:36, 80.84s/it]

[D] [13:47:39.270662] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:47:40.706322] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 40%|████      | 12/30 [16:10<24:15, 80.85s/it]

[D] [13:49:00.124376] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:49:01.539674] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 43%|████▎     | 13/30 [17:31<22:54, 80.85s/it]

[D] [13:50:20.978344] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:50:22.406131] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 47%|████▋     | 14/30 [18:51<21:33, 80.84s/it]

[D] [13:51:41.813728] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:51:43.238004] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 50%|█████     | 15/30 [20:12<20:12, 80.85s/it]

[D] [13:53:02.689199] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:53:04.115554] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 53%|█████▎    | 16/30 [21:33<18:51, 80.84s/it]

[D] [13:54:23.488727] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:54:24.907948] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 57%|█████▋    | 17/30 [22:54<17:30, 80.82s/it]

[D] [13:55:44.269315] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:55:45.689653] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 60%|██████    | 18/30 [24:15<16:09, 80.82s/it]

[D] [13:57:05.107209] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:57:06.533084] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 63%|██████▎   | 19/30 [25:35<14:49, 80.83s/it]

[D] [13:58:25.954036] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:58:27.380955] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 67%|██████▋   | 20/30 [26:56<13:28, 80.84s/it]

[D] [13:59:46.811353] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [13:59:48.223736] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 70%|███████   | 21/30 [28:17<12:07, 80.85s/it]

[D] [14:01:07.683210] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:01:09.109301] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 73%|███████▎  | 22/30 [29:38<10:46, 80.83s/it]

[D] [14:02:28.461633] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:02:29.878205] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 77%|███████▋  | 23/30 [30:59<09:25, 80.81s/it]

[D] [14:03:49.229802] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:03:50.650494] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 80%|████████  | 24/30 [32:20<08:04, 80.82s/it]

[D] [14:05:10.067781] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:05:11.486871] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 83%|████████▎ | 25/30 [33:40<06:44, 80.81s/it]

[D] [14:06:30.867020] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:06:32.281569] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 87%|████████▋ | 26/30 [35:01<05:23, 80.82s/it]

[D] [14:07:51.702384] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:07:53.123824] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 90%|█████████ | 27/30 [36:22<04:02, 80.82s/it]

[D] [14:09:12.514623] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:09:13.945658] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 93%|█████████▎| 28/30 [37:43<02:41, 80.82s/it]

[D] [14:10:33.326432] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:10:34.764928] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


 97%|█████████▋| 29/30 [39:04<01:20, 80.84s/it]

[D] [14:11:54.225559] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.
[D] [14:11:55.652195] /project/python/_skbuild/linux-x86_64-3.8/cmake-build/cuml/common/logger.cxx:3188 Expected column ('F') major order, Converting data, this will result in additional memory utilization.


100%|██████████| 30/30 [40:25<00:00, 80.84s/it]


In [None]:
train_df.to_pandas().to_csv(SAVE_DIR / 'train_df.csv')

In [70]:
def postprocess(df, threshold: float=0.95, top_n: int = 5):
  df.loc[df.predictions_proba>=threshold,'pred'] = 1
  df.loc[df.predictions_proba<threshold,'pred'] = 0 
  result = []
  grouped_df = df.groupby('topic_id')
  for idx, df_ in tqdm.tqdm(grouped_df):
    if df_.pred.sum()==0:
      res_df = df_.iloc[:top_n]
    else:
      res_df = df_[df_.pred==1]
    result.append(res_df.loc[:,['topic_id', 'content_id']])
  result = pd.concat(result, axis=0)
  result = pd.DataFrame(result.groupby('topic_id').apply(lambda x:' '.join(x.content_id)))
  result =result.reset_index().rename(columns={0:'content_ids'})
  result['content_ids'] = result['content_ids'].apply(lambda x:' '.join(x.split(' ')))
  return result

In [71]:
correlations_df = pd.read_csv('../data/row/correlations.csv')

In [72]:
result = postprocess(train_df)

100%|██████████| 61512/61512 [00:57<00:00, 1067.34it/s]


In [73]:
target_df = correlations_df[correlations_df.topic_id.isin(result.topic_id)].reset_index(drop=True)

In [74]:
from sklearn.metrics import fbeta_score
import numpy as np

def calc_f2_score(true_ids, pred_ids):
  true_positives = len(set(true_ids)&set(pred_ids))
  false_positives = len(set(pred_ids)-set(true_ids))
  false_negatives = len(set(true_ids)-set(pred_ids))

  beta = 2
  f2_score = ((1+beta**2)*true_positives)/((1+beta**2)*true_positives + beta**2*false_negatives + false_positives)
  return f2_score

def calc_f2_score_mean(target_df, pred_df):
  shape = target_df.shape
  score = [calc_f2_score(target_df.loc[i, 'content_ids'].split(), pred_df.loc[i, 'content_ids'].split()) for i in range(shape[0])]
  target_df['f2_score'] = score
  return target_df['f2_score'].mean(), target_df

In [75]:
calc_f2_score_mean(target_df, result)

(0.35242676437162374,
              topic_id                                        content_ids  \
 0      t_0008768bdee6       c_34e1424229b4 c_7d1a964d66d5 c_aab93ee667f4   
 1      t_0008a1bd84ba                      c_7ff92a954a3d c_8790b074383e   
 2      t_000d1fb3f2f5  c_07f1d0eec4b2 c_15a6fb858696 c_175e9db3fc44 c...   
 3      t_000feba42136                      c_2bbc650030f4 c_304ee4f59410   
 4      t_00102869fbcb  c_005e793bd0c5 c_066737fa5146 c_08b2070f92e0 c...   
 ...               ...                                                ...   
 61507  t_fff830472691                      c_61fb63326e5d c_8f224e321c87   
 61508  t_fff9e5407d13  c_026db653a269 c_0fb048a6412c c_20de77522603 c...   
 61509  t_fffbe1d5d43c                      c_46f852a49c08 c_6659207b25d5   
 61510  t_fffe14f1be1e                                     c_cece166bad6a   
 61511  t_fffe811a6da9                                     c_92b8fad372ee   
 
        f2_score  
 0      0.272727  
 1      0.0000

In [78]:
result

Unnamed: 0,topic_id,content_ids
0,t_0008768bdee6,c_563674a55312 c_179302899cfc c_7a656c9cd793 c...
1,t_0008a1bd84ba,c_70cee4e16c16 c_d9113d4c24bc c_cd596a5911d5 c...
2,t_000d1fb3f2f5,c_d7b213393f30 c_371c295721d5 c_55e7244b3fea c...
3,t_000feba42136,c_d59718802908 c_a07f9be390b4 c_5cb131146cbe c...
4,t_00102869fbcb,c_2da287c406c2 c_8559c7cb6343 c_e974407bc1d3 c...
...,...,...
61507,t_fff830472691,c_61fb63326e5d c_0bf565d988de c_773cd43e2ff7
61508,t_fff9e5407d13,c_0fb048a6412c c_15d01758cca7 c_5a80e03b571a c...
61509,t_fffbe1d5d43c,c_0dd3eab0f444
61510,t_fffe14f1be1e,c_47a8fdc3f590 c_f15928e0f771
