In [39]:
from functools import partial

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import lightgbm
from lightgbm import Dataset

## Import roc_auc_pairwise package

In [2]:
#This function computes sigmoid pairwise loss auc on gpu with approx. auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_gpu import sigmoid_pairwise_loss_auc_gpu_py
#This function computes sigmoid pairwise loss auc on gpu with exact auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_gpu import sigmoid_pairwise_loss_auc_exact_gpu_py
#This function computes sigmoid pairwise gradient and hessian auc on gpu with approx. auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_gpu import sigmoid_pairwise_diff_hess_auc_gpu_py
#This function computes sigmoid pairwise gradient and hessian auc on gpu with exact auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_gpu import sigmoid_pairwise_diff_hess_auc_exact_gpu_py

#This function computes sigmoid pairwise loss auc on cpu with approx. auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_cpu import sigmoid_pairwise_loss_auc_cpu_py
#This function computes sigmoid pairwise loss auc on cpu with exact auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_cpu import sigmoid_pairwise_loss_auc_exact_cpu_py
#This function computes sigmoid pairwise gradient and hessian auc on cpu with approx. auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_cpu import sigmoid_pairwise_diff_hess_auc_cpu_py
#This function computes sigmoid pairwise gradient and hessian auc on cpu with exact auc computation 
from roc_auc_pairwise.sigmoid_pairwise_auc_cpu import sigmoid_pairwise_diff_hess_auc_exact_cpu_py

#This function computes sigmoid pairwise loss on gpu
from roc_auc_pairwise.sigmoid_pairwise_gpu import sigmoid_pairwise_loss_gpu_py 
#This function computes sigmoid pairwise gradent and hessian on gpu
from roc_auc_pairwise.sigmoid_pairwise_gpu import sigmoid_pairwise_diff_hess_gpu_py

#This function computes sigmoid pairwise loss on cpu 
from roc_auc_pairwise.sigmoid_pairwise_cpu import sigmoid_pairwise_loss_py
#This function computes sigmoid pairwise gradient and hessian on cpu
from roc_auc_pairwise.sigmoid_pairwise_cpu import sigmoid_pairwise_diff_hess_py

### Load breast_cancer dataset for binary classification

In [4]:
br_cancer = load_breast_cancer()
X = br_cancer['data']
y = br_cancer['target']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [20]:
fit = Dataset(X_train, y_train, free_raw_data=True)
val = Dataset(X_test, y_test, free_raw_data=True)

### Define roc_auc_score for lightgbm

In [35]:
#define roc_auc_score loss for lightgbm
def roc_auc_lgbm(preds, train_data):
    y = train_data.get_label()
    auc = roc_auc_score(y, preds)
    is_higher_better = True
    return 'default_rate', auc, is_higher_better

## Sigmoid pairwise Loss for lightgbm boosting

In [45]:
def sigmoid_pairwise_loss(preds, train_data, device):
    y = train_data.get_label()
    
    # !!!Important!!! gpu kernel accepts only single precision floats and int32_t integers
    y = np.array(y, dtype=np.int32)
    preds = np.array(preds, dtype=np.float32)
    
    if device == 'cpu':
        grad, hess = sigmoid_pairwise_diff_hess_py(y, preds)
    else:
        grad, hess = sigmoid_pairwise_diff_hess_gpu_py(y, preds)
    return -grad, -hess

In [46]:
model = lightgbm.train(
        params={'learning_rate': 0.005,
                'num_leaves' : 4,
                'boosting_type' : 'gbdt'},
        train_set=fit,
        num_boost_round=25,
        valid_sets=(fit, val),
        valid_names=('fit', 'val'),
        feval=roc_auc_lgbm,
        fobj =partial(sigmoid_pairwise_loss, device='gpu')
        )

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4542
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[1]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[2]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[3]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[4]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[5]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[6]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[7]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[8]	fit's default_rate: 0.982683	val's default_rate: 0.970403
[9]	fit's default_rate: 0.981816	val's default_rate: 0.970073
[10]	fit's default_rate: 0.981569	val's default_rate: 0.96875
[11]	fit's default_rate: 0.983013	val's default_rate: 0.977183
[12]	fit's default_rate: 0.983013	val's default_rate: 0.976521
[13]	fit's default_rate: 0.982828	val's default_rate: 0.976852
[14]	fit

## Sigmoid pairwise Loss with approx. AUC computation for lightgbm boosting

In [48]:
def sigmoid_pairwise_loss_auc_approx(preds, train_data, device):
    y = train_data.get_label()
    
    # !!!Important!!! gpu kernel accepts only single precision floats and int32_t integers
    y = np.array(y, dtype=np.int32)
    preds = np.array(preds, dtype=np.float32)
    
    if device == 'cpu':
        grad, hess = sigmoid_pairwise_diff_hess_auc_cpu_py(y, preds)
    else:
        grad, hess = sigmoid_pairwise_diff_hess_auc_gpu_py(y, preds)
    return -grad, -hess

In [51]:
model = lightgbm.train(
        params={'learning_rate': 0.005,
                'num_leaves' : 4,
                'boosting_type' : 'gbdt'},
        train_set=fit,
        num_boost_round=25,
        valid_sets=(fit, val),
        valid_names=('fit', 'val'),
        feval=roc_auc_lgbm,
        fobj =partial(sigmoid_pairwise_loss_auc_approx, device='gpu')
        )

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4542
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[1]	fit's default_rate: 0.96935	val's default_rate: 0.959325
[2]	fit's default_rate: 0.966945	val's default_rate: 0.964616
[3]	fit's default_rate: 0.972002	val's default_rate: 0.969081
[4]	fit's default_rate: 0.97225	val's default_rate: 0.968915
[5]	fit's default_rate: 0.972208	val's default_rate: 0.967923
[6]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[7]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[8]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[9]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[10]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[11]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[12]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[13]	fit's default_rate: 0.972291	val's default_rate: 0.966931
[14]	fit'

## 

## Sigmoid pairwise Loss with approx. AUC computation for lightgbm boosting

In [53]:
def sigmoid_pairwise_loss_auc_exact(preds, train_data, device):
    y = train_data.get_label()
    
    # !!!Important!!! gpu kernel accepts only single precision floats and int32_t integers
    y = np.array(y, dtype=np.int32)
    preds = np.array(preds, dtype=np.float32)
    
    global N
    
    # First boosting round has to be with sigmoid pairwise loss, 
    # because with all same y_pred deltaauc will be always zero, and grad, hess would be 0 too
    if N == 0:
        if device == 'cpu':
            grad, hess = sigmoid_pairwise_diff_hess_gpu_py(y, preds)
        else:
            grad, hess = sigmoid_pairwise_diff_hess_py(y, preds)
        N = 1
    else:
        if device == 'cpu':
            grad, hess = sigmoid_pairwise_diff_hess_auc_exact_cpu_py(y, preds)
        else:
            grad, hess = sigmoid_pairwise_diff_hess_auc_exact_gpu_py(y, preds)
    return -grad, -hess

In [55]:
N = 0
model = lightgbm.train(
        params={'learning_rate': 0.005,
                'num_leaves' : 4,
                'boosting_type' : 'gbdt'},
        train_set=fit,
        num_boost_round=25,
        valid_sets=(fit, val),
        valid_names=('fit', 'val'),
        feval=roc_auc_lgbm,
        fobj =partial(sigmoid_pairwise_loss_auc_exact, device='gpu')
        )

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4542
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[1]	fit's default_rate: 0.975996	val's default_rate: 0.964616
[2]	fit's default_rate: 0.979948	val's default_rate: 0.980489
[3]	fit's default_rate: 0.977843	val's default_rate: 0.979497
[4]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[5]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[6]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[7]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[8]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[9]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[10]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[11]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[12]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[13]	fit's default_rate: 0.980279	val's default_rate: 0.979497
[14]	fi