Traning on AutoDL

In [None]:
import pandas as pd
import polars as pl
import numpy as np
import os
from tqdm.auto import tqdm
import psutil
import pickle

from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import lightgbm as lgb
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

In [None]:
class CONFIG:
    seed = 28
    
    target_col = "responder_6"
    feature_cols = ["symbol_id", "time_id"] \
        + [f"feature_{idx:02d}" for idx in range(79)] \
        + [f"responder_{idx}_mean_lag" for idx in range(9)] \
        + [f"responder_{idx}_std_lag" for idx in range(9)] \
        + [f"responder_{idx}_max_lag" for idx in range(9)] \
        + [f"responder_{idx}_last_lag" for idx in range(9)] \
        + [f"responder_{idx}_chg_lag" for idx in range(9)]


In [None]:
def get_model(seed):
    XGB_Params = {
        'learning_rate': 0.05,
        'max_depth': 6,
        'n_estimators': 250,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'reg_alpha': 1,
        'reg_lambda': 1,
        'random_state': seed,
        'tree_method': 'gpu_hist',
        'device' : 'cuda',
        'n_gpus' : 4
    }
    
    XGB_Model = XGBRegressor(**XGB_Params)
    return XGB_Model

In [None]:
train_path = '/root/autodl-tmp/train.parquet'
train = pl.scan_parquet(train_path)

In [None]:
X_train = train.select(CONFIG.feature_cols).collect()
X_train.shape

In [None]:
y_train = train.select(CONFIG.target_col).collect()
w_train = train.select('weight').collect()
y_train.shape,w_train.shape

In [None]:
%%time
model = get_model(CONFIG.seed)
model.fit( X_train, y_train, sample_weight=w_train)

In [None]:
result = {
    "model" : model,
}
with open("result_900.pkl", "wb") as fp:
    pickle.dump(result, fp)

In [None]:
y_pred_valid = model.predict(X_train.tail(5000000))
valid_score = r2_score(y_train.tail(5000000), y_pred_valid, sample_weight=w_train.tail(5000000) )
valid_score