Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified config_example.json
100755 → 100644
Empty file.
37 changes: 37 additions & 0 deletions cuml/train_test_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
from bench import measure_function_time, parse_args, load_data, print_output
from cuml import train_test_split

parser = argparse.ArgumentParser(
description='cuml train_test_split benchmark')
parser.add_argument('--train-size', type=float, default=0.75,
help='Size of training subset')
parser.add_argument('--test-size', type=float, default=0.25,
help='Size of testing subset')
parser.add_argument('--do-not-shuffle', default=False, action='store_true',
help='Do not perform data shuffle before splitting')
params = parse_args(parser)

# Load generated data
X, y, _, _ = load_data(params)

tts_params = {
'train_size': params.train_size,
'test_size': params.test_size,
'shuffle': not params.do_not_shuffle,
'random_state': params.seed
}

time, _ = measure_function_time(train_test_split, X=X, y=y, params=params)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
'time')

print_output(library='cuml', algorithm='train_test_split',
stages=['training'], columns=columns, params=params,
functions=['train_test_split'], times=[time], accuracies=[None],
accuracy_type=None, data=[X], alg_params=tts_params)
54 changes: 54 additions & 0 deletions sklearn/train_test_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
from bench import measure_function_time, parse_args, load_data, print_output
from sklearn.model_selection import train_test_split

parser = argparse.ArgumentParser(
description='scikit-learn train_test_split benchmark')
parser.add_argument('--train-size', type=float, default=0.75,
help='Size of training subset')
parser.add_argument('--test-size', type=float, default=0.25,
help='Size of testing subset')
parser.add_argument('--do-not-shuffle', default=False, action='store_true',
help='Do not perform data shuffle before splitting')
parser.add_argument('--include-y', default=False, action='store_true',
help='Include label (Y) in splitting')
parser.add_argument('--rng', default=None,
choices=('MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH',
'MCG31', 'MCG59', 'MRG32K3A', 'PHILOX4X32X10',
'NONDETERM', None),
help='Random numbers generator for shuffling '
'(only for IDP scikit-learn)')
params = parse_args(parser)

# Load generated data
X, y, _, _ = load_data(params)

if params.include_y:
data_args = (X, y)
else:
data_args = (X, )

tts_params = {
'train_size': params.train_size,
'test_size': params.test_size,
'shuffle': not params.do_not_shuffle,
'random_state': params.seed
}

if params.rng is not None:
tts_params['rng'] = params.rng

time, _ = measure_function_time(train_test_split, *data_args, **tts_params,
params=params)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
'time')

print_output(library='sklearn', algorithm='train_test_split',
stages=['training'], columns=columns, params=params,
functions=['train_test_split'], times=[time], accuracies=[None],
accuracy_type=None, data=[X], alg_params=tts_params)