# Загрузка данных с кагла

Перед этим необходимо скопировать ключ со своего акка

In [16]:
!pip install -q kaggle

from google.colab import files
files.upload()


!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c hse-rec-sys-challenge-2024

!unzip hse-rec-sys-challenge-2024.zip

Saving kaggle.json to kaggle (1).json
hse-rec-sys-challenge-2024.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  hse-rec-sys-challenge-2024.zip
replace events.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: events.csv              
replace item_features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: item_features.csv       
replace submission_sample.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: submission_sample.csv   
replace user_features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: user_features.csv       


Ставим пакеты

In [17]:
!pip install lightgbm
!pip install catboost
!pip install implicit
!pip install optuna



В нашем случае будем использовать ALS - как baseline

Catboost и LGBM - наши основные модели

Код разделил на несколько ячеек где реализовал все необходимые функции для работы. Какого-то строгого порядка нет, точнее он был, но из-за постоянной редакции очередность функций сбилась. Это не особо важно, но красоты стало меньше

In [30]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier, Pool
from scipy.sparse import csr_matrix
from typing import Tuple
from sklearn.preprocessing import LabelEncoder
from implicit.als import AlternatingLeastSquares
from lightgbm import LGBMClassifier
import optuna

# Фиксация всех случайных сидов
SEED = 42
np.random.seed(SEED)

# Загрузка данных
events = pd.read_csv('events.csv')
user = pd.read_csv('user_features.csv')
item = pd.read_csv('item_features.csv')

# Функция для вычисления метрики recall@10
def calculate_recall_at_10(predictions, ground_truth):
    hits = 0
    for user_id in ground_truth['user_id'].unique():
        actual_items = set(ground_truth.loc[ground_truth['user_id'] == user_id, 'item_id'])
        predicted_items = set(predictions.loc[predictions['user_id'] == user_id, 'item_id'])

        hits += len(actual_items & predicted_items)

    recall = hits / len(ground_truth)
    return recall

# Преобразование временных данных в числовой формат
def convert_timedelta_to_numeric(df):
    for col in df.select_dtypes(include=['timedelta64[ns]']).columns:
        df[col] = df[col].dt.total_seconds()
    return df

# Функция для оптимизации гиперпараметров с использованием Optuna
def optimize_lightgbm(X_train, y_train):
    def objective(trial):
        param = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 200),
            'max_depth': trial.suggest_int('max_depth', 4, 12),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'num_leaves': trial.suggest_int('num_leaves', 20, 100),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 30),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'random_state': SEED  # фиксируем сид
        }

        model = LGBMClassifier(**param)
        model.fit(X_train, y_train)
        score = model.score(X_train, y_train)
        return score

    # Установка фиксированного random_state через Sampler
    sampler = optuna.samplers.TPESampler(seed=SEED)
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective, n_trials=50)

    return study.best_params

# LightGBM model
def fit_lightgbm(
    candidates: pd.DataFrame,
    test_events: pd.DataFrame,
    events: pd.DataFrame
) -> LGBMClassifier:
    candidates_labeled = (
        candidates
        .merge(
            (
                test_events[['user_id', 'item_id']]
                .assign(label=1)
            ),
            on=['user_id', 'item_id'],
            how='left',
        )
    )

    candidates_labeled['label'] = (
        candidates_labeled['label']
        .fillna(0)
        .astype('int32')
    )

    positive_classes_rate = candidates_labeled['label'].sum() / len(candidates_labeled)

    lgb_features = enrich_interactions(candidates_labeled, events)
    lgb_features = convert_timedelta_to_numeric(lgb_features)

    X_train = lgb_features.drop(columns=['user_id', 'item_id', 'label'])
    for col in X_train.columns:
        if X_train[col].dtype == 'object':
            X_train[col] = pd.to_numeric(X_train[col], errors='coerce')
            X_train[col].fillna(0, inplace=True)

    y_train = lgb_features['label']

    best_params = optimize_lightgbm(X_train, y_train)
    best_params['class_weight'] = {0: positive_classes_rate, 1: 1 - positive_classes_rate}
    best_params['metric'] = 'binary_logloss'
    best_params['random_state'] = SEED  # фиксируем сид

    lgb_model = LGBMClassifier(**best_params)
    lgb_model.fit(X_train, y_train)

    return lgb_model



# предикт LightGBM model с проверкой данных
def predict_lightgbm(
    candidates: pd.DataFrame,
    events: pd.DataFrame,
    lgb_model: LGBMClassifier
) -> pd.DataFrame:
    interactions_featurized = enrich_interactions(candidates, events)

    print('Running LightGBM scoring')
    interactions = interactions_featurized[['user_id', 'item_id']].copy()
    features = interactions_featurized.drop(columns=['user_id', 'item_id'])

    # Преобразование временных данных в числовой формат
    features = convert_timedelta_to_numeric(features)

    for col in features.columns:
        if features[col].dtype == 'object':
            features[col] = pd.to_numeric(features[col], errors='coerce')
            features[col].fillna(0, inplace=True)

    scores = lgb_model.predict_proba(features)[:, 1].flatten()
    interactions['lightgbm_score'] = scores
    return interactions


Шаманим с данными и ALS

In [31]:
# Объединение метаинформации о фильмах с транзакциями
def add_meta_to_events(events, item):
    events_with_meta = (
        events
        .merge(item,
               on='item_id', how='left')
    )
    # Пример вычисления отношения просмотров
    events_with_meta['watched_ratio'] = events_with_meta['rating'] / 5.0

    def score_event(row):
        score = 1
        if row['rating'] >= 3:  # Пример: учитываем рейтинг от 3 и выше
            score = int(row['rating'] * 2)
        return min(score, 10)

    events_with_meta['score'] = events_with_meta.apply(score_event, axis=1)
    return events_with_meta

# Добавление рейтингов к событиям
def add_ratings_to_events(events):
    events['score'].fillna(events['rating'], inplace=True)
    return events

# Кодирование TF-IDF
def encode_tfidf_coo(events: pd.DataFrame) -> pd.DataFrame:
    score_sum_per_user = (
        events
        .groupby('user_id')['score']
        .transform('sum')
    )
    user_count_per_item = (
        events
        .groupby('item_id')['user_id']
        .transform('size')
    )
    tf = events['score'].values / score_sum_per_user.values
    idf = np.log(len(score_sum_per_user) / user_count_per_item.values)

    tfidf = events[['user_id', 'item_id']].copy()
    tfidf['value'] = tf * idf

    return tfidf

# Преобразование данных в формат TF-IDF для ALS
def encode_tfidf(events: pd.DataFrame) -> Tuple[LabelEncoder, LabelEncoder, csr_matrix]:
    tfidf = encode_tfidf_coo(events)

    user_encoder = LabelEncoder()
    item_encoder = LabelEncoder()

    user_index = user_encoder.fit_transform(events['user_id'].values)
    item_index = item_encoder.fit_transform(events['item_id'].values)

    tfidf_csr = csr_matrix(
        (
            tfidf['value'].astype('float32').values,
            (user_index, item_index)
        ),
        shape=(len(user_encoder.classes_), len(item_encoder.classes_))
    )

    return user_encoder, item_encoder, tfidf_csr

# Функция для запуска ALS
def als_fit_predict(events_csr: csr_matrix):
    als = AlternatingLeastSquares(factors=128, iterations=30, alpha=40.0, calculate_training_loss=True)
    als.fit(events_csr)

    recommendations_matrix, recommendations_scores = als.recommend(
        np.arange(0, events_csr.shape[0]),
        events_csr,
        N=10,
        filter_already_liked_items=True
    )

    return recommendations_matrix, recommendations_scores

# Преобразование рекомендаций в DataFrame
def als_recommendations_to_df(
    recommendations_matrix: np.ndarray,
    recommendations_scores: np.ndarray,
    user_encoder: LabelEncoder,
    item_encoder: LabelEncoder,
    user_key = 'user_id',
    item_key = 'item_id'
) -> pd.DataFrame:
    recommendations_indices = pd.DataFrame({
        'user_index': np.arange(0, len(recommendations_matrix)),
        'item_index': list(recommendations_matrix),
        'score': list(recommendations_scores),
    })

    user_mapping = pd.DataFrame({
        'user_index': np.arange(0, len(user_encoder.classes_)),
        user_key: user_encoder.classes_,
    })

    item_mapping = pd.DataFrame({
        'item_index': np.arange(0, len(item_encoder.classes_)),
        item_key: item_encoder.classes_,
    })

    recommendations = (
        recommendations_indices
        .merge(
            user_mapping,
            on='user_index',
            how='left',
        )
        .drop(columns=['user_index'])
        .explode(['item_index', 'score'], ignore_index=True)
        .merge(
            item_mapping,
            on='item_index',
            how='left',
        )
        .drop(columns=['item_index'])
    )

    return recommendations

# Главная функция для запуска ALS
def run_als(events: pd.DataFrame, item: pd.DataFrame) -> pd.DataFrame:
    print('Preprocess events')
    events = add_meta_to_events(events, item)
    events = add_ratings_to_events(events)

    print('Compute TF-IDF')
    user_encoder, item_encoder, events_csr = encode_tfidf(events)

    print('Run ALS')
    recommendations_item_indices, recommendations_scores = als_fit_predict(events_csr)

    print('Postprocess ALS prediction')
    recommendations = als_recommendations_to_df(
        recommendations_item_indices,
        recommendations_scores,
        user_encoder,
        item_encoder,
        user_key='user_id',
        item_key='item_id',
    )

    return recommendations

# Выполнение рекомендаций
recommendations = run_als(events, item)
print(recommendations.head())

Preprocess events
Compute TF-IDF
Run ALS


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  events['score'].fillna(events['rating'], inplace=True)


  0%|          | 0/30 [00:00<?, ?it/s]

Postprocess ALS prediction
      score  user_id  item_id
0  0.931738        0     1811
1  0.831426        0      331
2  0.800304        0     2256
3  0.771639        0     1545
4   0.69525        0     1248


Еще несколько функций для обработки данных

In [32]:
# Функция для объединения новых фич с item_features
def merge_item_feature(item_features, new_feature):
    item_features = item_features.merge(
        new_feature,
        on='item_id',
        how='left'
    )
    return item_features

# Функция для объединения новых фич с user_features
def merge_user_feature(user_features, new_feature):
    user_features = user_features.merge(
        new_feature,
        on='user_id',
        how='left'
    )
    return user_features

# Добавление фичи популярности фильмов
def add_item_popularity_feature(item_features: pd.DataFrame, events: pd.DataFrame):
    item_occurrences = (
        events
        .groupby('item_id')
        .size()
        .reset_index(name='item_occurrences')
    )

    item_occurrences['item_popularity'] = (
        item_occurrences['item_occurrences'] /
        events['item_id'].nunique()
    )
    item_occurrences.drop(columns=['item_occurrences'], inplace=True)

    return merge_item_feature(item_features, item_occurrences)

# Добавление фичи числа закладок на фильмы (если применимо)
def add_item_bookmark_count_feature(item_features: pd.DataFrame, events: pd.DataFrame):
    bookmarks_per_item = (
        events
        .groupby('item_id')
        .size()
        .reset_index(name='item_bookmark_count')
    )

    item_features = merge_item_feature(item_features, bookmarks_per_item)
    item_features['item_bookmark_count'].fillna(0, inplace=True)

    return item_features

# Добавление фичи количества просмотров для пользователя
def add_user_watch_count_feature(user_features: pd.DataFrame, events: pd.DataFrame):
    user_watch_count = (
        events
        .groupby('user_id')
        .size()
        .reset_index(name='user_watch_count')
    )
    return merge_user_feature(user_features, user_watch_count)

# Добавление фичи стандартного отклонения времени просмотра для пользователя
def add_user_watch_time_std(user_features: pd.DataFrame, events: pd.DataFrame):
    user_watch_time_std = (
        events
        .groupby('user_id')['timestamp']
        .std()
        .reset_index(name='user_watch_time_std')
    )
    return merge_user_feature(user_features, user_watch_time_std)

# Генерация фич для фильмов
def generate_item_features(events: pd.DataFrame) -> pd.DataFrame:
    item_features = pd.DataFrame({
        'item_id': events['item_id'].unique()
    })
    item_features = add_item_popularity_feature(item_features, events)
    item_features = add_item_bookmark_count_feature(item_features, events)
    return item_features

# Убедитесь, что timestamp имеет тип datetime
events['timestamp'] = pd.to_datetime(events['timestamp'], errors='coerce')

# Добавление фичи количества просмотров за последнюю неделю для пользователей
def add_recent_watch_count(user_features: pd.DataFrame, events: pd.DataFrame):
    current_time = pd.Timestamp.now()  # Текущее время
    recent_watches = (
        events
        .loc[events['timestamp'] >= current_time - pd.Timedelta(weeks=1)]  # Изменение с использованием loc
        .groupby('user_id')
        .size()
        .reset_index(name='recent_watch_count')
    )
    user_features = merge_user_feature(user_features, recent_watches)
    user_features['recent_watch_count'].fillna(0, inplace=True)
    return user_features
# Добавление фичи средней оценки пользователя
def add_user_avg_rating(user_features: pd.DataFrame, events: pd.DataFrame):
    avg_rating = (
        events
        .groupby('user_id')['rating']
        .mean()
        .reset_index(name='user_avg_rating')
    )
    user_features = merge_user_feature(user_features, avg_rating)
    user_features['user_avg_rating'].fillna(0, inplace=True)
    return user_features

# Добавление этих фич в генерацию фич для пользователей
def generate_user_features(events: pd.DataFrame) -> pd.DataFrame:
    user_features = pd.DataFrame({
        'user_id': events['user_id'].unique()
    })
    user_features = add_user_watch_count_feature(user_features, events)
    user_features = add_user_watch_time_std(user_features, events)
    user_features = add_recent_watch_count(user_features, events)  # Новая фича
    user_features = add_user_avg_rating(user_features, events)  # Новая фича
    return user_features

# Обогащение взаимодействий (интеракций) фичами
def enrich_interactions(
    interactions: pd.DataFrame,
    events: pd.DataFrame
) -> pd.DataFrame:
    print('Generating item features')
    item_features = generate_item_features(events)

    print('Generating user features')
    user_features = generate_user_features(events)

    print('Merging features to interactions')
    interactions_featurized = (
        interactions
        .merge(
            item_features,
            on='item_id',
            how='left'
        )
        .merge(
            user_features,
            on='user_id',
            how='left'
        )
    )

    return interactions_featurized

# Функция для оценки интеракций с использованием CatBoost
def score_interactions(
    interactions_featurized: pd.DataFrame,
    catboost_model: CatBoostClassifier,
) -> pd.DataFrame:
    interactions = interactions_featurized[['user_id', 'item_id']].copy()
    features = interactions_featurized.drop(columns=['user_id', 'item_id'])
    scores = catboost_model.predict_proba(features)[:, 1].flatten()
    interactions['catboost_score'] = scores
    return interactions

Переходим к catboost

In [33]:
# Обучение модели CatBoost
def fit_catboost(
    candidates: pd.DataFrame,
    test_events: pd.DataFrame,
    events: pd.DataFrame
) -> CatBoostClassifier:
    candidates_labeled = (
        candidates
        .merge(
            (
                test_events[['user_id', 'item_id']]
                .assign(label=1)
            ),
            on=['user_id', 'item_id'],
            how='left',
        )
    )

    candidates_labeled['label'] = (
        candidates_labeled['label']
        .fillna(0)
        .astype('int32')
    )

    positive_classes_rate = candidates_labeled['label'].sum() / len(candidates_labeled)

    cb_features = enrich_interactions(candidates_labeled, events)

    cb_pool = Pool(
        cb_features.drop(columns=['user_id', 'item_id', 'label']),
        cb_features['label']
    )

    cb_cls = CatBoostClassifier(
        iterations=100,
        class_weights=[positive_classes_rate, 1 - positive_classes_rate],
        eval_metric='BalancedAccuracy',
        objective='Logloss',
        random_seed=SEED  # фиксируем сид
    )

    cb_cls.fit(cb_pool, verbose=1)

    return cb_cls

# Предсказание с использованием обученной модели CatBoost
def predict_catboost(
    candidates: pd.DataFrame,
    events: pd.DataFrame,
    catboost_model: CatBoostClassifier
) -> pd.DataFrame:
    interactions_featurized = enrich_interactions(candidates, events)

    print('Running CatBoost scoring')
    scored_interactions = score_interactions(interactions_featurized, catboost_model)

    return scored_interactions


Переходим к обучению

In [34]:

# Добавляем ранжирование событий для каждого пользователя по времени
events['split_rank_per_user'] = (
    events
    .groupby('user_id')['timestamp']
    .rank('first', ascending=False)
    .astype('int32')
)

# Разделение данных на тренировочный и тестовый наборы для ALS
als_events_train = (
    events
    .query('split_rank_per_user > 2')
    .drop(columns=['split_rank_per_user'])
)

als_events_test = (
    events
    .query('split_rank_per_user <= 2')
    .drop(columns=['split_rank_per_user'])
)[["item_id", "user_id"]]

# Запуск ALS для создания кандидатов для обучения модели CatBoost
als_candidates_to_train_catboost = run_als(als_events_train, item)

# Обучение модели CatBoost с использованием кандидатов, сгенерированных ALS
catboost_model = fit_catboost(
    als_candidates_to_train_catboost,
    als_events_test,
    events
)

# Training LightGBM
lightgbm_model = fit_lightgbm(
    als_candidates_to_train_catboost,
    als_events_test,
    events
)

# Запуск ALS на всех данных для генерации кандидатов
als_candidates = run_als(events, item)

catboost_prediction_full = predict_catboost(als_candidates, events, catboost_model)
lightgbm_prediction_full = predict_lightgbm(als_candidates, events, lightgbm_model)


combined_predictions = (
    catboost_prediction_full[['user_id', 'item_id', 'catboost_score']]
    .merge(
        lightgbm_prediction_full[['user_id', 'item_id', 'lightgbm_score']],
        on=['user_id', 'item_id'],
        how='inner'
    )
)



combined_predictions['final_score'] = (
    combined_predictions['catboost_score'] * 0.5 +
    combined_predictions['lightgbm_score'] * 0.5
)

# Выбор топ-10 рекомендаций на основе финального скоринга
final_recommendations = (
    combined_predictions
    .sort_values('final_score', ascending=False, ignore_index=True)
    .groupby('user_id')
    .head(10)
)

# Рассчет метрики recall@10
recall_at_10 = calculate_recall_at_10(final_recommendations, als_events_test)
print(f'Recall@10: {recall_at_10:.4f}')

Preprocess events
Compute TF-IDF
Run ALS


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  events['score'].fillna(events['rating'], inplace=True)


  0%|          | 0/30 [00:00<?, ?it/s]

Postprocess ALS prediction
Generating item features
Generating user features
Merging features to interactions


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  item_features['item_bookmark_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  user_features['recent_watch_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on 

Learning rate set to 0.490272
0:	learn: 0.6519323	total: 7.77ms	remaining: 770ms
1:	learn: 0.6554779	total: 14.8ms	remaining: 727ms
2:	learn: 0.6620804	total: 21.1ms	remaining: 682ms
3:	learn: 0.6663461	total: 27.5ms	remaining: 660ms
4:	learn: 0.6670651	total: 33.7ms	remaining: 641ms
5:	learn: 0.6743995	total: 39.8ms	remaining: 623ms
6:	learn: 0.6745742	total: 45.7ms	remaining: 607ms
7:	learn: 0.6831516	total: 52.1ms	remaining: 599ms
8:	learn: 0.6841629	total: 58.4ms	remaining: 591ms
9:	learn: 0.6898207	total: 67.5ms	remaining: 608ms
10:	learn: 0.6929038	total: 73.6ms	remaining: 596ms
11:	learn: 0.6935187	total: 80.1ms	remaining: 588ms
12:	learn: 0.6975028	total: 86.5ms	remaining: 579ms
13:	learn: 0.7004448	total: 92.6ms	remaining: 569ms
14:	learn: 0.7059413	total: 99.1ms	remaining: 562ms
15:	learn: 0.7055740	total: 106ms	remaining: 556ms
16:	learn: 0.7111562	total: 112ms	remaining: 549ms
17:	learn: 0.7115213	total: 119ms	remaining: 541ms
18:	learn: 0.7186143	total: 126ms	remaining: 53

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  item_features['item_bookmark_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  user_features['recent_watch_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on 

Merging features to interactions
[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000490 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:58:59,404] Trial 0 finished with value: 0.9888741721854305 and parameters: {'n_estimators': 106, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'num_leaves': 68, 'min_child_samples': 9, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:00,117] Trial 1 finished with value: 0.9858112582781456 and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.11114989443094977, 'num_leaves': 21, 'min_child_samples': 30, 'subsample': 0.9162213204002109, 'colsample_bytree': 0.6061695553391381}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:00,585] Trial 2 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 77, 'max_depth': 5, 'learning_rate': 0.028145092716060652, 'num_leaves': 62, 'min_child_samples': 16, 'subsample': 0.645614570099021, 'colsample_bytree': 0.8059264473611898}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:01,088] Trial 3 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 71, 'max_depth': 6, 'learning_rate': 0.03476649150592621, 'num_leaves': 56, 'min_child_samples': 25, 'subsample': 0.5998368910791798, 'colsample_bytree': 0.7571172192068059}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:01,752] Trial 4 finished with value: 0.9855960264900663 and parameters: {'n_estimators': 139, 'max_depth': 4, 'learning_rate': 0.07896186801026692, 'num_leaves': 33, 'min_child_samples': 6, 'subsample': 0.9744427686266666, 'colsample_bytree': 0.9828160165372797}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002609 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:03,482] Trial 5 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 172, 'max_depth': 6, 'learning_rate': 0.013940346079873234, 'num_leaves': 75, 'min_child_samples': 16, 'subsample': 0.5610191174223894, 'colsample_bytree': 0.7475884550556351}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:03,946] Trial 6 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 55, 'max_depth': 12, 'learning_rate': 0.024112898115291985, 'num_leaves': 73, 'min_child_samples': 13, 'subsample': 0.7600340105889054, 'colsample_bytree': 0.7733551396716398}. Best is trial 0 with value: 0.9888741721854305.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:04,618] Trial 7 finished with value: 0.9893046357615894 and parameters: {'n_estimators': 77, 'max_depth': 12, 'learning_rate': 0.13962563737015762, 'num_leaves': 96, 'min_child_samples': 28, 'subsample': 0.7989499894055425, 'colsample_bytree': 0.9609371175115584}. Best is trial 7 with value: 0.9893046357615894.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000815 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:05,004] Trial 8 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 63, 'max_depth': 5, 'learning_rate': 0.011662890273931383, 'num_leaves': 46, 'min_child_samples': 15, 'subsample': 0.6356745158869479, 'colsample_bytree': 0.9143687545759647}. Best is trial 7 with value: 0.9893046357615894.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000875 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:05,571] Trial 9 finished with value: 0.9855794701986755 and parameters: {'n_estimators': 103, 'max_depth': 6, 'learning_rate': 0.06333268775321842, 'num_leaves': 31, 'min_child_samples': 25, 'subsample': 0.5372753218398854, 'colsample_bytree': 0.9934434683002586}. Best is trial 7 with value: 0.9893046357615894.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000825 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:06,685] Trial 10 finished with value: 0.9999337748344371 and parameters: {'n_estimators': 127, 'max_depth': 10, 'learning_rate': 0.2704729722717776, 'num_leaves': 97, 'min_child_samples': 22, 'subsample': 0.7876040334076363, 'colsample_bytree': 0.8651834476035049}. Best is trial 10 with value: 0.9999337748344371.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000801 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:08,039] Trial 11 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 144, 'max_depth': 10, 'learning_rate': 0.2975261995001501, 'num_leaves': 100, 'min_child_samples': 23, 'subsample': 0.787259441123297, 'colsample_bytree': 0.8624854118086367}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002791 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:09,475] Trial 12 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 148, 'max_depth': 9, 'learning_rate': 0.29515574329354227, 'num_leaves': 100, 'min_child_samples': 22, 'subsample': 0.8560072547236134, 'colsample_bytree': 0.8659124772759615}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000867 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:10,772] Trial 13 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 154, 'max_depth': 10, 'learning_rate': 0.2554897812704697, 'num_leaves': 89, 'min_child_samples': 20, 'subsample': 0.8552976434133046, 'colsample_bytree': 0.8559889011067058}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002673 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:12,514] Trial 14 finished with value: 0.9978145695364239 and parameters: {'n_estimators': 196, 'max_depth': 8, 'learning_rate': 0.1887279139198365, 'num_leaves': 83, 'min_child_samples': 21, 'subsample': 0.6823370190932401, 'colsample_bytree': 0.6795419728640772}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:13,902] Trial 15 finished with value: 0.9949006622516556 and parameters: {'n_estimators': 153, 'max_depth': 8, 'learning_rate': 0.18468934395628178, 'num_leaves': 100, 'min_child_samples': 25, 'subsample': 0.8636970220104709, 'colsample_bytree': 0.8758067124078944}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:15,023] Trial 16 finished with value: 0.9979966887417219 and parameters: {'n_estimators': 106, 'max_depth': 10, 'learning_rate': 0.2574389854097962, 'num_leaves': 81, 'min_child_samples': 20, 'subsample': 0.7173832817613116, 'colsample_bytree': 0.6869888528707921}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003965 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:16,624] Trial 17 finished with value: 0.9861092715231788 and parameters: {'n_estimators': 153, 'max_depth': 9, 'learning_rate': 0.04848754515310091, 'num_leaves': 88, 'min_child_samples': 24, 'subsample': 0.8529694011819124, 'colsample_bytree': 0.9138936297287705}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000833 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:17,667] Trial 18 finished with value: 0.9961920529801325 and parameters: {'n_estimators': 126, 'max_depth': 11, 'learning_rate': 0.17441159401168885, 'num_leaves': 91, 'min_child_samples': 28, 'subsample': 0.9768435138993212, 'colsample_bytree': 0.8171470785071347}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:18,631] Trial 19 finished with value: 0.9863907284768212 and parameters: {'n_estimators': 140, 'max_depth': 7, 'learning_rate': 0.08933100959339269, 'num_leaves': 55, 'min_child_samples': 18, 'subsample': 0.8985637995946585, 'colsample_bytree': 0.9127677064047461}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:19,941] Trial 20 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 172, 'max_depth': 9, 'learning_rate': 0.29831973866884814, 'num_leaves': 78, 'min_child_samples': 11, 'subsample': 0.8200622143396646, 'colsample_bytree': 0.7120629854048839}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000821 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:21,216] Trial 21 finished with value: 0.9996026490066225 and parameters: {'n_estimators': 152, 'max_depth': 10, 'learning_rate': 0.20979842927762166, 'num_leaves': 89, 'min_child_samples': 19, 'subsample': 0.9219864480021497, 'colsample_bytree': 0.8366733523272174}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000807 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:22,725] Trial 22 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 164, 'max_depth': 11, 'learning_rate': 0.29885779690358494, 'num_leaves': 100, 'min_child_samples': 22, 'subsample': 0.7360687845931082, 'colsample_bytree': 0.8671238018540113}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000872 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:24,382] Trial 23 finished with value: 0.9989403973509934 and parameters: {'n_estimators': 191, 'max_depth': 11, 'learning_rate': 0.14601769176156076, 'num_leaves': 91, 'min_child_samples': 23, 'subsample': 0.8490483200027006, 'colsample_bytree': 0.9365712142609361}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000818 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:25,571] Trial 24 finished with value: 0.997682119205298 and parameters: {'n_estimators': 141, 'max_depth': 9, 'learning_rate': 0.21888339317113836, 'num_leaves': 85, 'min_child_samples': 27, 'subsample': 0.7722627621679675, 'colsample_bytree': 0.7986557562498018}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000822 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:26,948] Trial 25 finished with value: 0.9960927152317881 and parameters: {'n_estimators': 122, 'max_depth': 10, 'learning_rate': 0.16364489024782425, 'num_leaves': 95, 'min_child_samples': 19, 'subsample': 0.8240718275728154, 'colsample_bytree': 0.8537341895677684}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:28,426] Trial 26 finished with value: 0.9975827814569537 and parameters: {'n_estimators': 160, 'max_depth': 8, 'learning_rate': 0.22949367934479417, 'num_leaves': 68, 'min_child_samples': 21, 'subsample': 0.8877885446780809, 'colsample_bytree': 0.8881061115123524}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000884 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:29,309] Trial 27 finished with value: 0.9940066225165562 and parameters: {'n_estimators': 114, 'max_depth': 11, 'learning_rate': 0.13294191006126022, 'num_leaves': 92, 'min_child_samples': 14, 'subsample': 0.7305883778847562, 'colsample_bytree': 0.9442924999601435}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001081 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:30,039] Trial 28 finished with value: 0.9860264900662251 and parameters: {'n_estimators': 90, 'max_depth': 7, 'learning_rate': 0.10257321328874733, 'num_leaves': 100, 'min_child_samples': 18, 'subsample': 0.690480495040737, 'colsample_bytree': 0.788261421342471}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000805 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:31,023] Trial 29 finished with value: 0.9971854304635761 and parameters: {'n_estimators': 138, 'max_depth': 10, 'learning_rate': 0.2263128141355243, 'num_leaves': 69, 'min_child_samples': 27, 'subsample': 0.9435849383457133, 'colsample_bytree': 0.831962880712174}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000484 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:32,529] Trial 30 finished with value: 0.9858774834437086 and parameters: {'n_estimators': 179, 'max_depth': 8, 'learning_rate': 0.04634598796674966, 'num_leaves': 81, 'min_child_samples': 23, 'subsample': 0.5001310561523564, 'colsample_bytree': 0.5110021684815564}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002650 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:33,820] Trial 31 finished with value: 0.966523178807947 and parameters: {'n_estimators': 165, 'max_depth': 9, 'learning_rate': 0.2909337937359034, 'num_leaves': 77, 'min_child_samples': 7, 'subsample': 0.8195583224644691, 'colsample_bytree': 0.716852767217316}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:35,331] Trial 32 finished with value: 0.9999503311258278 and parameters: {'n_estimators': 182, 'max_depth': 9, 'learning_rate': 0.24612061341737307, 'num_leaves': 86, 'min_child_samples': 10, 'subsample': 0.8215557450742038, 'colsample_bytree': 0.6057597199088922}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:36,611] Trial 33 finished with value: 0.9999503311258278 and parameters: {'n_estimators': 148, 'max_depth': 9, 'learning_rate': 0.28408389089747627, 'num_leaves': 92, 'min_child_samples': 11, 'subsample': 0.8749371437631981, 'colsample_bytree': 0.6323585999023394}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002726 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:37,891] Trial 34 finished with value: 0.9952152317880795 and parameters: {'n_estimators': 173, 'max_depth': 10, 'learning_rate': 0.12569678896387215, 'num_leaves': 78, 'min_child_samples': 12, 'subsample': 0.9360516315557644, 'colsample_bytree': 0.7488896253867213}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:39,330] Trial 35 finished with value: 0.9894701986754967 and parameters: {'n_estimators': 133, 'max_depth': 7, 'learning_rate': 0.17104265864214943, 'num_leaves': 93, 'min_child_samples': 16, 'subsample': 0.8010646774387342, 'colsample_bytree': 0.7117158187529896}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003856 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:40,467] Trial 36 finished with value: 0.9906291390728477 and parameters: {'n_estimators': 163, 'max_depth': 9, 'learning_rate': 0.1053749348194819, 'num_leaves': 61, 'min_child_samples': 7, 'subsample': 0.8387004763749483, 'colsample_bytree': 0.6305221789473104}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000858 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:41,944] Trial 37 finished with value: 0.9857450331125828 and parameters: {'n_estimators': 186, 'max_depth': 11, 'learning_rate': 0.022565565689933613, 'num_leaves': 87, 'min_child_samples': 9, 'subsample': 0.9991417930830016, 'colsample_bytree': 0.7713747646030205}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000820 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:42,958] Trial 38 finished with value: 0.9977317880794702 and parameters: {'n_estimators': 145, 'max_depth': 12, 'learning_rate': 0.1985855806101896, 'num_leaves': 72, 'min_child_samples': 5, 'subsample': 0.7707751574972814, 'colsample_bytree': 0.897399007583037}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000487 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:44,430] Trial 39 finished with value: 0.9876986754966888 and parameters: {'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.07485497274693963, 'num_leaves': 66, 'min_child_samples': 17, 'subsample': 0.9009507992788578, 'colsample_bytree': 0.5777853244552633}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002698 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:45,456] Trial 40 finished with value: 0.9923509933774834 and parameters: {'n_estimators': 172, 'max_depth': 10, 'learning_rate': 0.15718831599289973, 'num_leaves': 56, 'min_child_samples': 30, 'subsample': 0.749670671489579, 'colsample_bytree': 0.7275786785517303}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000811 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:46,898] Trial 41 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 159, 'max_depth': 11, 'learning_rate': 0.28302226086442595, 'num_leaves': 98, 'min_child_samples': 22, 'subsample': 0.7374896746357029, 'colsample_bytree': 0.8557788333421755}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000877 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:48,343] Trial 42 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 173, 'max_depth': 12, 'learning_rate': 0.2877398733919786, 'num_leaves': 96, 'min_child_samples': 20, 'subsample': 0.7009106099270098, 'colsample_bytree': 0.8223179926689209}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000879 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:49,926] Trial 43 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 166, 'max_depth': 11, 'learning_rate': 0.24136206983100886, 'num_leaves': 99, 'min_child_samples': 23, 'subsample': 0.6406089351880103, 'colsample_bytree': 0.9632060314532265}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003980 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:51,893] Trial 44 finished with value: 0.999569536423841 and parameters: {'n_estimators': 157, 'max_depth': 10, 'learning_rate': 0.204472266698477, 'num_leaves': 95, 'min_child_samples': 26, 'subsample': 0.7893567565530104, 'colsample_bytree': 0.8465670101509657}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:53,074] Trial 45 finished with value: 0.9974834437086093 and parameters: {'n_estimators': 132, 'max_depth': 8, 'learning_rate': 0.24720549837815373, 'num_leaves': 100, 'min_child_samples': 20, 'subsample': 0.6722662667659364, 'colsample_bytree': 0.8783891713362519}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000816 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:53,766] Trial 46 finished with value: 0.9919701986754967 and parameters: {'n_estimators': 168, 'max_depth': 10, 'learning_rate': 0.29582317452430557, 'num_leaves': 23, 'min_child_samples': 24, 'subsample': 0.8073579718519528, 'colsample_bytree': 0.7851248034085208}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000860 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:54,819] Trial 47 finished with value: 0.9855629139072848 and parameters: {'n_estimators': 147, 'max_depth': 11, 'learning_rate': 0.015485189966472976, 'num_leaves': 82, 'min_child_samples': 22, 'subsample': 0.7659165366147342, 'colsample_bytree': 0.8075378611891494}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:55,433] Trial 48 finished with value: 0.9856788079470199 and parameters: {'n_estimators': 119, 'max_depth': 4, 'learning_rate': 0.19462396215726013, 'num_leaves': 42, 'min_child_samples': 21, 'subsample': 0.8686609822255503, 'colsample_bytree': 0.6763432686974793}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000886 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.014437 -> initscore=-4.223413
[LightGBM] [Info] Start training from score -4.223413


[I 2024-10-25 12:59:56,832] Trial 49 finished with value: 0.9999668874172185 and parameters: {'n_estimators': 178, 'max_depth': 12, 'learning_rate': 0.25118079205538413, 'num_leaves': 89, 'min_child_samples': 15, 'subsample': 0.8410521077792, 'colsample_bytree': 0.931356066016142}. Best is trial 11 with value: 0.9999668874172185.


[LightGBM] [Info] Number of positive: 872, number of negative: 59528
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1508
[LightGBM] [Info] Number of data points in the train set: 60400, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
Preprocess events


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  events['score'].fillna(events['rating'], inplace=True)


Compute TF-IDF
Run ALS


  0%|          | 0/30 [00:00<?, ?it/s]

Postprocess ALS prediction
Generating item features
Generating user features
Merging features to interactions
Running CatBoost scoring


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  item_features['item_bookmark_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  user_features['recent_watch_count'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on 

Generating item features
Generating user features
Merging features to interactions
Running LightGBM scoring


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  user_features['user_avg_rating'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  features[col].fillna(0, inplace=True)


Recall@10: 0.0000


Получаем наш сабмишен

In [35]:
# Создание DataFrame для сабмита
submission = final_recommendations[['user_id', 'item_id']].copy()

# Группировка по user_id и объединение item_id в строку, разделенную пробелами
submission = (
    submission
    .groupby('user_id')['item_id']
    .apply(lambda x: ' '.join(map(str, x)))
    .reset_index()
)

# Сохранение сабмита в CSV файл
submission.to_csv('submission_bobs.csv', index=False)

In [36]:
submission

Unnamed: 0,user_id,item_id
0,0,785 1811 331 2256 1248 1001 1545 2397 569 2732
1,1,106 1699 1868 1039 1491 1246 2175 3656 232 1686
2,2,3318 2354 1809 293 1781 2132 2639 2185 452 2342
3,3,1861 2186 2194 1984 3435 1617 3472 3318 3528 3327
4,4,2774 37 1809 188 3677 1858 1560 3298 1337 1191
...,...,...
6035,6035,2784 1375 3013 160 502 1296 1011 1855 1753 1216
6036,6036,796 3473 1354 3013 772 640 3456 36 236 741
6037,6037,2195 2784 3638 1968 3583 618 2688 2603 2646 318
6038,6038,184 2626 2587 182 1100 2063 1808 361 2920 3105
