diff --git a/.gitignore b/.gitignore
index fea142e82..0f647d708 100755
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ __work*
 # Datasets
 dataset
 *.csv
+*.npy
diff --git a/LICENSE b/LICENSE
index da66bc348..d79ad5528 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2017-2019 Intel Corporation
+Copyright (c) 2017-2020 Intel Corporation
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/modelbuilders/bench.py b/modelbuilders/bench.py
new file mode 100644
index 000000000..35b5030b1
--- /dev/null
+++ b/modelbuilders/bench.py
@@ -0,0 +1,613 @@
+# Copyright (C) 2017-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+
+import argparse
+import numpy as np
+import sklearn
+import timeit
+import json
+
+
+def get_dtype(data):
+    '''
+    Get type of input data as numpy.dtype
+    '''
+    if hasattr(data, 'dtype'):
+        return data.dtype
+    elif hasattr(data, 'dtypes'):
+        return str(data.dtypes[0])
+    elif hasattr(data, 'values'):
+        return data.values.dtype
+    else:
+        raise ValueError(f'Impossible to get data type of {type(data)}')
+
+
+try:
+    from daal4py.sklearn._utils import getFPType
+except ImportError:
+    def getFPType(X):
+        dtype = str(get_dtype(X))
+        if 'float32' in dtype:
+            return 'float'
+        elif 'float64' in dtype:
+            return 'double'
+        else:
+            ValueError('Unknown type')
+
+
+def sklearn_disable_finiteness_check():
+    try:
+        sklearn.set_config(assume_finite=True)
+    except AttributeError:
+        try:
+            sklearn._ASSUME_FINITE = True
+        except AttributeError:
+            sklearn.utils.validation._assert_all_finite = lambda X: None
+
+
+def _parse_size(string, dim=2):
+    try:
+        tup = tuple(int(n) for n in string.replace('x', ',').split(','))
+    except Exception as e:
+        msg = (
+            f'Invalid size "{string}": sizes must be integers separated by '
+            f'"x" or ",".'
+        )
+        raise argparse.ArgumentTypeError(msg) from e
+
+    if len(tup) != dim:
+        msg = f'Expected size parameter of {dim} dimensions but got {len(tup)}'
+        raise argparse.ArgumentTypeError(msg)
+
+    return tup
+
+
+def float_or_int(string):
+    if '.' in string:
+        return float(string)
+    else:
+        return int(string)
+
+
+def parse_args(parser, size=None, loop_types=(),
+               n_jobs_supported=False, prefix='sklearn'):
+    '''
+    Add common arguments useful for most benchmarks and parse.
+
+    Parameters
+    ----------
+    parser : argparse.ArgumentParser
+        Parser to which the arguments should be added.
+    size : tuple of int, optional
+        Enable '--size' argument with this default size.
+        If None (default), no '--size' argument will be added.
+    loop_types : iterable of str, optional
+        Add arguments like '--fit-inner-loops' and '--fit-outer-loops',
+        useful for tweaking runtime of the benchmark.
+    n_jobs_supported : bool
+        If set to True, generate a n_jobs member in the argparse Namespace
+        corresponding to the optimal n_jobs parameter for scikit-learn.
+        Otherwise, n_jobs will be set to None.
+    prefix : str, optional, default 'sklearn'
+        The default prefix to report
+
+    Returns
+    -------
+    parser : argparse.ArgumentParser
+        Parser to which the arguments were added.
+        This is the same parser that was passed to this function.
+    '''
+
+    parser.add_argument('-n', '--num-threads', '--core-number', default=-1,
+                        dest='threads', type=int,
+                        help='Number of threads to use')
+    parser.add_argument('-a', '--arch', default='?',
+                        help='Machine architecture, for bookkeeping')
+    parser.add_argument('-b', '--batch', '--batchID', default='?',
+                        help='Batch ID, for bookkeeping')
+    parser.add_argument('-p', '--prefix', default=prefix,
+                        help='Prefix string, for bookkeeping')
+    parser.add_argument('--header', default=False, action='store_true',
+                        help='Output CSV header')
+    parser.add_argument('-v', '--verbose', default=False, action='store_true',
+                        help='Output extra debug messages')
+    parser.add_argument('--data-format', type=str, default='numpy',
+                        choices=('numpy', 'pandas', 'cudf'),
+                        help='Data format: numpy (default), pandas, cudf')
+    parser.add_argument('--data-order', type=str, default='C',
+                        choices=('C', 'F'),
+                        help='Data order: C (row-major, default) or'
+                             'F (column-major)')
+    parser.add_argument('-d', '--dtype', type=np.dtype, default=np.float64,
+                        choices=(np.float32, np.float64),
+                        help='Data type: float64 (default) or float32')
+    parser.add_argument('--check-finiteness', default=False,
+                        action='store_true',
+                        help='Check finiteness in sklearn input check'
+                             '(disabled by default)')
+    parser.add_argument('--output-format', type=str, default='csv',
+                        choices=('csv', 'json'),
+                        help='Output format: csv (default) or json')
+    parser.add_argument('--time-method', type=str, default='mean_min',
+                        choices=('box_filter', 'mean_min'),
+                        help='Method used for time mesurements')
+    parser.add_argument('--box-filter-measurements', type=int, default=100,
+                        help='Maximum number of measurements in box filter')
+    parser.add_argument('--inner-loops', default=100, type=int,
+                        help='Maximum inner loop iterations '
+                             '(we take the mean over inner iterations)')
+    parser.add_argument('--outer-loops', default=100, type=int,
+                        help='Maximum outer loop iterations '
+                             '(we take the min over outer iterations)')
+    parser.add_argument('--time-limit', default=10., type=float,
+                        help='Target time to spend to benchmark')
+    parser.add_argument('--goal-outer-loops', default=10,
+                        type=int, dest='goal',
+                        help='Number of outer loops to aim '
+                             'while automatically picking number of '
+                             'inner loops. If zero, do not automatically '
+                             'decide number of inner loops.')
+    parser.add_argument('--seed', type=int, default=12345,
+                        help='Seed to pass as random_state')
+    parser.add_argument('--dataset-name', type=str, default=None,
+                        help='Dataset name')
+
+    for data in ['X', 'y']:
+        for stage in ['train', 'test']:
+            parser.add_argument(f'--file-{data}-{stage}',
+                                type=argparse.FileType('r'),
+                                help=f'Input file with {data}_{stage},'
+                                     'in NPY format')
+
+    if size is not None:
+        parser.add_argument('-s', '--size', default=size, type=_parse_size,
+                            dest='shape',
+                            help='Problem size, delimited by "x" or ","')
+
+    params = parser.parse_args()
+
+    # disable finiteness check (default)
+    if not params.check_finiteness:
+        sklearn_disable_finiteness_check()
+
+    # Ask DAAL what it thinks about this number of threads
+    num_threads, daal_version = prepare_daal(num_threads=params.threads)
+    if params.verbose and daal_version:
+        print(f'@ Found DAAL version {daal_version}')
+        print(f'@ DAAL gave us {num_threads} threads')
+
+    n_jobs = None
+    if n_jobs_supported and not daal_version:
+        n_jobs = num_threads = params.threads
+
+    # Set threading and DAAL related params here
+    setattr(params, 'threads', num_threads)
+    setattr(params, 'daal_version', daal_version)
+    setattr(params, 'using_daal', daal_version is not None)
+    setattr(params, 'n_jobs', n_jobs)
+
+    # Set size string parameter for easy printing
+    if size is not None:
+        setattr(params, 'size', size_str(params.shape))
+
+    # Very verbose output
+    if params.verbose:
+        print(f'@ params = {params.__dict__}')
+
+    return params
+
+
+def size_str(shape):
+    return 'x'.join(str(d) for d in shape)
+
+
+def print_header(columns, params):
+    if params.header:
+        print(','.join(columns))
+
+
+def print_row(columns, params, **kwargs):
+    values = []
+
+    for col in columns:
+        if col in kwargs:
+            values.append(str(kwargs[col]))
+        elif hasattr(params, col):
+            values.append(str(getattr(params, col)))
+        else:
+            values.append('')
+
+    print(','.join(values))
+
+
+def set_daal_num_threads(num_threads):
+    try:
+        import daal4py
+        if num_threads:
+            daal4py.daalinit(nthreads=num_threads)
+    except ImportError:
+        print('@ Package "daal4py" was not found. Number of threads '
+              'is being ignored')
+
+
+def prepare_daal(num_threads=-1):
+    try:
+        if num_threads > 0:
+            set_daal_num_threads(num_threads)
+        import daal4py
+        num_threads = daal4py.num_threads()
+        daal_version = daal4py.__daal_run_version__
+    except ImportError:
+        num_threads = 1
+        daal_version = None
+
+    return num_threads, daal_version
+
+
+def measure_function_time(func, *args, params, **kwargs):
+    if params.time_method == 'mean_min':
+        return time_mean_min(func, *args,
+                             outer_loops=params.outer_loops,
+                             inner_loops=params.inner_loops,
+                             goal_outer_loops=params.goal,
+                             time_limit=params.time_limit,
+                             verbose=params.verbose, **kwargs)
+    else:
+        return time_box_filter(func, *args,
+                               n_meas=params.box_filter_measurements,
+                               time_limit=params.time_limit, **kwargs)
+
+
+def time_box_filter(func, *args, n_meas, time_limit, **kwargs):
+    times = []
+    while len(times) < n_meas:
+        t0 = timeit.default_timer()
+        val = func(*args, **kwargs)
+        t1 = timeit.default_timer()
+        times.append(t1-t0)
+        if sum(times) > time_limit:
+            break
+
+    def box_filter(timing, left=0.25, right=0.75):
+        timing.sort()
+        size = len(timing)
+        if size == 1:
+            return timing[0]
+        Q1, Q2 = timing[int(size * left)], timing[int(size * right)]
+        IQ = Q2 - Q1
+        lower = Q1 - 1.5 * IQ
+        upper = Q2 + 1.5 * IQ
+        result = np.array([item for item in timing if lower < item < upper])
+        return np.mean(result)
+
+    return box_filter(times), val
+
+
+def time_mean_min(func, *args, inner_loops=1, outer_loops=1, time_limit=10.,
+                  goal_outer_loops=10, verbose=False, **kwargs):
+    '''
+    Time the given function (inner_loops * outer_loops) times, returning the
+    min of the inner loop means.
+
+    Parameters
+    ----------
+    func : callable f(*args, **kwargs)
+        The function to time.
+    inner_loops : int
+        Maximum number of inner loop iterations to take the mean over.
+    outer_loops : int
+        Maximum number of outer loop iterations to take the min over.
+    time_limit : double
+        Number of seconds to aim for. If accumulated time exceeds time_limit
+        in outer loops, exit without running more outer loops. If zero,
+        disable time limit.
+    goal_outer_loops : int
+        Number of outer loop iterations to aim for by taking warmup rounds
+        and tuning inner_loops automatically.
+    verbose : boolean
+        If True, print outer loop timings and miscellaneous information.
+
+    Returns
+    -------
+    time : float
+        The min of means.
+    val : return value of func
+        The last value returned by func.
+    '''
+
+    assert inner_loops * outer_loops > 0, \
+        'Must time the function at least once'
+
+    times = np.zeros(outer_loops, dtype='f8')
+    total_time = 0.
+
+    # Warm-up iterations to determine optimal inner_loops
+    warmup = (goal_outer_loops > 0)
+    warmup_time = 0.
+    last_warmup = 0.
+    if warmup:
+        for _ in range(inner_loops):
+            t0 = timeit.default_timer()
+            val = func(*args, **kwargs)
+            t1 = timeit.default_timer()
+
+            last_warmup = t1 - t0
+            warmup_time += last_warmup
+            if warmup_time > time_limit / 10:
+                break
+
+        inner_loops = max(1, int(time_limit / last_warmup / goal_outer_loops))
+        logverbose(f'Optimal inner loops = {inner_loops}', verbose)
+
+    if last_warmup > time_limit:
+        # If we took too much time in warm-up, just use those numbers
+        logverbose(f'A single warmup iteration took {last_warmup:0.2f}s '
+                   f'> {time_limit:0.2f}s - not performing any more timings',
+                   verbose)
+        outer_loops = 1
+        inner_loops = 1
+        times[0] = last_warmup
+        times = times[:1]
+    else:
+        # Otherwise, actually take the timing
+        for i in range(outer_loops):
+
+            t0 = timeit.default_timer()
+            for _ in range(inner_loops):
+                val = func(*args, **kwargs)
+            t1 = timeit.default_timer()
+
+            times[i] = t1 - t0
+            total_time += times[i]
+
+            if time_limit > 0 and total_time > time_limit:
+                logverbose(f'TT={total_time:0.2f}s exceeding {time_limit}s '
+                           f'after iteration {i+1}', verbose)
+                outer_loops = i + 1
+                times = times[:outer_loops]
+                break
+
+    # We take the mean of inner loop times
+    times /= inner_loops
+    logverbose('Mean times [s]', verbose)
+    logverbose(f'{times}', verbose)
+
+    # We take the min of outer loop times
+    return np.min(times), val
+
+
+def logverbose(msg, verbose):
+    '''
+    Print msg as a verbose logging message only if verbose is True
+    '''
+    if verbose:
+        print('@', msg)
+
+
+def convert_to_numpy(data):
+    '''
+    Convert input data to numpy array
+    '''
+    if 'cudf' in str(type(data)):
+        data = data.to_pandas().values
+    elif 'pandas' in str(type(data)):
+        data = data.values
+    elif isinstance(data, np.ndarray):
+        pass
+    elif 'numba.cuda.cudadrv.devicearray.DeviceNDArray' in str(type(data)):
+        data = np.array(data)
+    else:
+        raise TypeError(
+            f'Unknown data format "{type(data)}" for convertion to np.ndarray')
+    return data
+
+
+def columnwise_score(y, yp, score_func):
+    y = convert_to_numpy(y)
+    yp = convert_to_numpy(yp)
+    if y.ndim + yp.ndim > 2:
+        if 1 in (y.shape + yp.shape)[1:]:
+            if y.ndim > 1:
+                y = y[:, 0]
+            if yp.ndim > 1:
+                yp = yp[:, 0]
+        else:
+            return [score_func(y[i], yp[i]) for i in range(y.shape[1])]
+    return score_func(y, yp)
+
+
+def accuracy_score(y, yp):
+    return columnwise_score(y, yp, lambda y1, y2: np.mean(y1 == y2))
+
+
+def rmse_score(y, yp):
+    return columnwise_score(
+        y, yp, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))
+
+
+def convert_data(data, dtype, data_order, data_format):
+    '''
+    Convert input data (numpy array) to needed format, type and order
+    '''
+    # Firstly, change order and type of data
+    if data_order == 'F':
+        data = np.asfortranarray(data, dtype)
+    elif data_order == 'C':
+        data = np.ascontiguousarray(data, dtype)
+
+    # Secondly, change format of data
+    if data_format == 'numpy':
+        return data
+    elif data_format == 'pandas':
+        import pandas as pd
+
+        if data.ndim == 1:
+            return pd.Series(data)
+        else:
+            return pd.DataFrame(data)
+    elif data_format == 'cudf':
+        import cudf
+        import pandas as pd
+
+        return cudf.DataFrame.from_pandas(pd.DataFrame(data))
+
+
+def read_csv(filename, params):
+    from string import ascii_lowercase, ascii_uppercase
+
+    # find out header existance
+    header_letters = set(
+        ascii_lowercase.replace('e', '') + ascii_uppercase.replace('E', ''))
+    with open(filename, 'r') as file:
+        first_line = file.readline()
+        while 'nan' in first_line:
+            first_line = first_line.replace('nan', '')
+        header = 0 if len(header_letters & set(first_line)) != 0 else None
+    # try to read csv with pandas and fall back to numpy reader if failed
+    try:
+        import pandas as pd
+        data = pd.read_csv(filename, header=header, dtype=params.dtype).values
+    except ImportError:
+        data = np.genfromtxt(filename, delimiter=',', dtype=params.dtype,
+                             skip_header=0 if header is None else 1)
+
+    if data.ndim == 2:
+        if data.shape[1] == 1:
+            data = data.reshape((data.shape[0],))
+
+    return data
+
+
+def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
+              int_label=False):
+    full_data = {
+        file: None for file in ['X_train', 'X_test', 'y_train', 'y_test']
+    }
+    param_vars = vars(params)
+    int_dtype = np.int32 if '32' in str(params.dtype) else np.int64
+    for element in full_data:
+        file_arg = f'file_{element}'
+        # load and convert data from npy/csv file if path is specified
+        if param_vars[file_arg] is not None:
+            if param_vars[file_arg].name.endswith('.npy'):
+                data = np.load(param_vars[file_arg].name)
+            else:
+                data = read_csv(param_vars[file_arg].name, params)
+            full_data[element] = convert_data(
+                data,
+                int_dtype if 'y' in element and int_label else params.dtype,
+                params.data_order, params.data_format
+            )
+        # generate and convert data if it's marked and path isn't specified
+        if full_data[element] is None and element in generated_data:
+            full_data[element] = convert_data(
+                np.random.rand(*params.shape),
+                int_dtype if 'y' in element and int_label else params.dtype,
+                params.data_order, params.data_format)
+        # convert existing labels from 1- to 2-dimensional
+        # if it's forced and possible
+        if full_data[element] is not None and 'y' in element and label_2d and hasattr(
+                full_data[element],
+                'reshape'):
+            full_data[element] = full_data[element].reshape(
+                (full_data[element].shape[0], 1))
+        # add dtype property to data if it's needed and doesn't exist
+        if full_data[element] is not None and add_dtype and not hasattr(
+                full_data[element],
+                'dtype'):
+            if hasattr(full_data[element], 'values'):
+                full_data[element].dtype = full_data[element].values.dtype
+            elif hasattr(full_data[element], 'dtypes'):
+                full_data[element].dtype = full_data[element].dtypes[0].type
+
+    params.dtype = get_dtype(full_data['X_train'])
+    # add size to parameters which is need for some cases
+    if not hasattr(params, 'size'):
+        params.size = size_str(full_data['X_train'].shape)
+
+    # clone train data to test if test data is None
+    for data in ['X', 'y']:
+        if full_data[f'{data}_train'] is not None and full_data[f'{data}_test'] is None:
+            full_data[f'{data}_test'] = full_data[f'{data}_train']
+    return tuple(full_data.values())
+
+
+def output_csv(columns, params, functions, times, accuracies=None):
+    print_header(columns, params)
+    if accuracies is None:
+        accuracies = [None]*len(functions)
+    for i in range(len(functions)):
+        if accuracies[i] is not None:
+            print_row(columns, params, function=functions[i], time=times[i],
+                      accuracy=accuracies[i])
+        else:
+            print_row(columns, params, function=functions[i], time=times[i])
+
+
+def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
+                   alg_params=None):
+    result = {
+        'library': library,
+        'algorithm': algorithm,
+        'stage': stage,
+        'input_data': {
+            'data_format': params.data_format,
+            'data_order': params.data_order,
+            'data_type': str(params.dtype),
+            'dataset_name': params.dataset_name,
+            'rows': data.shape[0],
+            'columns': data.shape[1]
+        }
+    }
+    result['algorithm_parameters'] = {}
+    if alg_instance is not None:
+        if 'Booster' in str(type(alg_instance)):
+            alg_instance_params = dict(alg_instance.attributes())
+        else:
+            alg_instance_params = dict(alg_instance.get_params())
+        result['algorithm_parameters'].update(alg_instance_params)
+    if alg_params is not None:
+        result['algorithm_parameters'].update(alg_params)
+    return result
+
+
+def print_output(library, algorithm, stages, columns, params, functions,
+                 times, accuracy_type, accuracies, data, alg_instance=None,
+                 alg_params=None):
+    if params.output_format == 'csv':
+        output_csv(columns, params, functions, times, accuracies)
+    elif params.output_format == 'json':
+        output = []
+        for i in range(len(stages)):
+            result = gen_basic_dict(library, algorithm, stages[i], params,
+                                    data[i], alg_instance, alg_params)
+            result.update({'time[s]': times[i]})
+            if accuracy_type is not None:
+                result.update({f'{accuracy_type}': accuracies[i]})
+            if hasattr(params, 'n_classes'):
+                result['input_data'].update({'classes': params.n_classes})
+            if hasattr(params, 'n_clusters'):
+                if algorithm == 'kmeans':
+                    result['input_data'].update(
+                        {'n_clusters': params.n_clusters})
+                elif algorithm == 'dbscan':
+                    result.update({'n_clusters': params.n_clusters})
+            # replace non-string init with string for kmeans benchmarks
+            if alg_instance is not None:
+                if 'init' in result['algorithm_parameters'].keys():
+                    if not isinstance(result['algorithm_parameters']['init'], str):
+                        result['algorithm_parameters']['init'] = 'random'
+                if 'handle' in result['algorithm_parameters'].keys():
+                    del result['algorithm_parameters']['handle']
+            output.append(result)
+        print(json.dumps(output, indent=4))
+
+
+def import_fptype_getter():
+    try:
+        from daal4py.sklearn._utils import getFPType
+    except ImportError:
+        from daal4py.sklearn.utils import getFPType
+    return getFPType
diff --git a/modelbuilders/lgbm_mb.py b/modelbuilders/lgbm_mb.py
new file mode 100644
index 000000000..299c5a0c0
--- /dev/null
+++ b/modelbuilders/lgbm_mb.py
@@ -0,0 +1,141 @@
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+import daal4py
+import lightgbm as lgbm
+import numpy as np
+from os import environ
+from typing import Tuple
+
+
+from bench import load_data, measure_function_time, parse_args, rmse_score
+from utils import get_accuracy, print_output
+
+
+parser = argparse.ArgumentParser(
+    description='lightgbm gbt + model transform + daal predict benchmark')
+
+parser.add_argument('--colsample-bytree', type=float, default=1,
+                    help='Subsample ratio of columns '
+                         'when constructing each tree')
+parser.add_argument('--learning-rate', '--eta', type=float, default=0.3,
+                    help='Step size shrinkage used in update '
+                         'to prevents overfitting')
+parser.add_argument('--max-bin', type=int, default=256,
+                    help='Maximum number of discrete bins to '
+                         'bucket continuous features')
+parser.add_argument('--max-delta-step', type=float, default=0,
+                    help='Maximum delta step we allow each leaf output to be')
+parser.add_argument('--max-depth', type=int, default=6,
+                    help='Maximum depth of a tree')
+parser.add_argument('--max-leaves', type=int, default=0,
+                    help='Maximum number of nodes to be added')
+parser.add_argument('--min-child-weight', type=float, default=1,
+                    help='Minimum sum of instance weight needed in a child')
+parser.add_argument('--min-split-gain', '--gamma', type=float, default=0,
+                    help='Minimum loss reduction required to make'
+                         ' partition on a leaf node')
+parser.add_argument('--n-estimators', type=int, default=100,
+                    help='Number of gradient boosted trees')
+parser.add_argument('--objective', type=str, required=True,
+                    choices=('regression', 'binary', 'multiclass'),
+                    help='Control a balance of positive and negative weights')
+parser.add_argument('--reg-alpha', type=float, default=0,
+                    help='L1 regularization term on weights')
+parser.add_argument('--reg-lambda', type=float, default=1,
+                    help='L2 regularization term on weights')
+parser.add_argument('--scale-pos-weight', type=float, default=1,
+                    help='Controls a balance of positive and negative weights')
+parser.add_argument('--subsample', type=float, default=1,
+                    help='Subsample ratio of the training instances')
+
+params = parse_args(parser)
+
+X_train, X_test, y_train, y_test = load_data(params)
+
+lgbm_params = {
+    'verbosity': -1,
+    'learning_rate': params.learning_rate,
+    'min_split_gain': params.min_split_gain,
+    'max_depth': params.max_depth,
+    'min_child_weight': params.min_child_weight,
+    'max_delta_step': params.max_delta_step,
+    'subsample': params.subsample,
+    'colsample_bytree': params.colsample_bytree,
+    'colsample_bynode': 1,
+    'reg_lambda': params.reg_lambda,
+    'reg_alpha': params.reg_alpha,
+    'scale_pos_weight': params.scale_pos_weight,
+    'max_leaves': params.max_leaves,
+    'max_bin': params.max_bin,
+    'objective': params.objective,
+    'seed': params.seed
+}
+
+if params.threads != -1:
+    lgbm_params.update({'nthread': params.threads})
+
+if 'OMP_NUM_THREADS' in environ.keys():
+    lgbm_params['nthread'] = int(environ['OMP_NUM_THREADS'])
+
+columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function', 'prep_function',
+                            'threads', 'dtype', 'size', 'num_trees', 'time', 'prep_time')
+
+if params.objective.startswith('reg'):
+    task = 'regression'
+    metric_name, metric_func = 'rmse', rmse_score
+    columns += ('rmse',)
+else:
+    task = 'classification'
+    metric_name, metric_func = 'accuracy[%]', get_accuracy
+    columns += ('n_classes', 'accuracy')
+    if 'cudf' in str(type(y_train)):
+        params.n_classes = y_train[y_train.columns[0]].nunique()
+    else:
+        params.n_classes = len(np.unique(y_train))
+    if params.n_classes > 2:
+        lgbm_params['num_class'] = params.n_classes
+
+t_creat_train, lgbm_train = measure_function_time(lgbm.Dataset, X_train, y_train, params=params,
+                                                  free_raw_data=False)
+
+t_creat_test, lgbm_test = measure_function_time(lgbm.Dataset, X_test, y_test, params=params,
+                                                reference=lgbm_train, free_raw_data=False)
+
+t_train, model_lgbm = measure_function_time(
+    lgbm.train, lgbm_params, lgbm_train, params=params, num_boost_round=params.n_estimators,
+    valid_sets=lgbm_train, verbose_eval=False)
+train_metric = None
+if not X_train.equals(X_test):
+    y_train_pred = model_lgbm.predict(X_train)
+    train_metric = metric_func(y_train, y_train_pred)
+
+t_lgbm_pred, y_test_pred = measure_function_time(model_lgbm.predict, X_test, params=params)
+test_metric_lgbm = metric_func(y_test, y_test_pred)
+
+t_trans, model_daal = measure_function_time(
+    daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params)
+
+if hasattr(params, 'n_classes'):
+    predict_algo = daal4py.gbt_classification_prediction(
+        nClasses=params.n_classes, resultsToEvaluate='computeClassLabels', fptype='float')
+    t_daal_pred, daal_pred = measure_function_time(
+        predict_algo.compute, X_test, model_daal, params=params)
+    test_metric_daal = metric_func(y_test, daal_pred.prediction)
+else:
+    predict_algo = daal4py.gbt_regression_prediction()
+    t_daal_pred, daal_pred = measure_function_time(
+        predict_algo.compute, X_test, model_daal, params=params)
+    test_metric_daal = metric_func(y_test, daal_pred.prediction)
+
+print_output(
+    library='modelbuilders', algorithm=f'lightgbm_{task}_and_modelbuilder',
+    stages=['lgbm_train', 'lgbm_predict', 'daal4py_predict'],
+    columns=columns, params=params,
+    functions=['lgbm_dataset', 'lgbm_dataset', 'lgbm_train', 'lgbm_predict', 'lgbm_to_daal',
+               'daal_compute'],
+    times=[t_creat_train, t_train, t_creat_test, t_lgbm_pred, t_trans, t_daal_pred],
+    accuracy_type=metric_name, accuracies=[train_metric, test_metric_lgbm, test_metric_daal],
+    data=[X_train, X_test, X_test])
diff --git a/modelbuilders/utils.py b/modelbuilders/utils.py
new file mode 100644
index 000000000..1a076daad
--- /dev/null
+++ b/modelbuilders/utils.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2017-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+
+from bench import print_header, print_row
+import json
+import numpy as np
+
+
+def get_accuracy(true_labels, prediction):
+    errors = 0
+    for i in range(len(true_labels)):
+        pred_label = 0
+        if isinstance(prediction[i], float) or \
+                isinstance(prediction[i], np.single) or \
+                isinstance(prediction[i], np.float):
+            pred_label = prediction[i] > 0.5
+        elif prediction[i].shape[0] == 1:
+            pred_label = prediction[i][0]
+        else:
+            pred_label = np.argmax(prediction[i])
+        if true_labels[i] != pred_label:
+            errors += 1
+    return 100 * (1 - errors/len(true_labels))
+
+
+def print_output(library, algorithm, stages, columns, params, functions,
+                 times, accuracy_type, accuracies, data):
+    if params.output_format == 'csv':
+        print_header(columns, params)
+        for i in range(len(accuracies)):
+            print_row(
+                columns, params, prep_function=functions[2 * i],
+                function=functions[2 * i + 1],
+                time=times[2 * i], prep_time=times[2 * i + 1],
+                accuracy=accuracies[i])
+    elif params.output_format == 'json':
+        output = []
+        output.append({
+            'library': library,
+            'algorithm': algorithm,
+            'input_data': {
+                'data_format': params.data_format,
+                'data_order': params.data_order,
+                'data_type': str(params.dtype),
+                'dataset_name': params.dataset_name,
+                'rows': data[0].shape[0],
+                'columns': data[0].shape[1]
+            }
+        })
+        if hasattr(params, 'n_classes'):
+            output[-1]['input_data'].update({'classes': params.n_classes})
+        for i in range(len(stages)):
+            result = {
+                'stage': stages[i],
+            }
+            if 'daal' in stages[i]:
+                result.update({'conversion_to_daal4py': times[2 * i],
+                               'prediction_time': times[2 * i + 1]})
+            elif 'train' in stages[i]:
+                result.update({'matrix_creation_time': times[2 * i],
+                               'training_time': times[2 * i + 1]})
+            else:
+                result.update({'matrix_creation_time': times[2 * i],
+                               'prediction_time': times[2 * i + 1]})
+            if accuracies[i] is not None:
+                result.update({f'{accuracy_type}': accuracies[i]})
+            output.append(result)
+        print(json.dumps(output, indent=4))
diff --git a/modelbuilders/xgb_mb.py b/modelbuilders/xgb_mb.py
new file mode 100644
index 000000000..a8849e31b
--- /dev/null
+++ b/modelbuilders/xgb_mb.py
@@ -0,0 +1,168 @@
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+import daal4py
+import numpy as np
+from os import environ
+from typing import Tuple
+import xgboost as xgb
+
+
+from bench import load_data, measure_function_time, parse_args, rmse_score
+from utils import get_accuracy, print_output
+
+
+parser = argparse.ArgumentParser(
+    description='xgboost gbt + model transform + daal predict benchmark')
+
+parser.add_argument('--colsample-bytree', type=float, default=1,
+                    help='Subsample ratio of columns '
+                         'when constructing each tree')
+parser.add_argument('--count-dmatrix', default=False, action='store_true',
+                    help='Count DMatrix creation in time measurements')
+parser.add_argument('--enable-experimental-json-serialization', default=True,
+                    choices=('True', 'False'), help='Use JSON to store memory snapshots')
+parser.add_argument('--grow-policy', type=str, default='depthwise',
+                    help='Controls a way new nodes are added to the tree')
+parser.add_argument('--learning-rate', '--eta', type=float, default=0.3,
+                    help='Step size shrinkage used in update '
+                         'to prevents overfitting')
+parser.add_argument('--max-bin', type=int, default=256,
+                    help='Maximum number of discrete bins to '
+                         'bucket continuous features')
+parser.add_argument('--max-delta-step', type=float, default=0,
+                    help='Maximum delta step we allow each leaf output to be')
+parser.add_argument('--max-depth', type=int, default=6,
+                    help='Maximum depth of a tree')
+parser.add_argument('--max-leaves', type=int, default=0,
+                    help='Maximum number of nodes to be added')
+parser.add_argument('--min-child-weight', type=float, default=1,
+                    help='Minimum sum of instance weight needed in a child')
+parser.add_argument('--min-split-loss', '--gamma', type=float, default=0,
+                    help='Minimum loss reduction required to make'
+                         ' partition on a leaf node')
+parser.add_argument('--n-estimators', type=int, default=100,
+                    help='Number of gradient boosted trees')
+parser.add_argument('--objective', type=str, required=True,
+                    choices=('reg:squarederror', 'binary:logistic',
+                             'multi:softmax', 'multi:softprob'),
+                    help='Control a balance of positive and negative weights')
+parser.add_argument('--reg-alpha', type=float, default=0,
+                    help='L1 regularization term on weights')
+parser.add_argument('--reg-lambda', type=float, default=1,
+                    help='L2 regularization term on weights')
+parser.add_argument('--scale-pos-weight', type=float, default=1,
+                    help='Controls a balance of positive and negative weights')
+parser.add_argument('--single-precision-histogram', default=False, action='store_true',
+                    help='Build histograms instead of double precision')
+parser.add_argument('--subsample', type=float, default=1,
+                    help='Subsample ratio of the training instances')
+parser.add_argument('--tree-method', type=str, required=True,
+                    help='The tree construction algorithm used in XGBoost')
+
+params = parse_args(parser)
+
+X_train, X_test, y_train, y_test = load_data(params)
+
+xgb_params = {
+    'booster': 'gbtree',
+    'verbosity': 0,
+    'learning_rate': params.learning_rate,
+    'min_split_loss': params.min_split_loss,
+    'max_depth': params.max_depth,
+    'min_child_weight': params.min_child_weight,
+    'max_delta_step': params.max_delta_step,
+    'subsample': params.subsample,
+    'sampling_method': 'uniform',
+    'colsample_bytree': params.colsample_bytree,
+    'colsample_bylevel': 1,
+    'colsample_bynode': 1,
+    'reg_lambda': params.reg_lambda,
+    'reg_alpha': params.reg_alpha,
+    'tree_method': params.tree_method,
+    'scale_pos_weight': params.scale_pos_weight,
+    'grow_policy': params.grow_policy,
+    'max_leaves': params.max_leaves,
+    'max_bin': params.max_bin,
+    'objective': params.objective,
+    'seed': params.seed,
+    'single_precision_histogram': params.single_precision_histogram,
+    'enable_experimental_json_serialization': params.enable_experimental_json_serialization
+}
+
+if params.threads != -1:
+    xgb_params.update({'nthread': params.threads})
+
+if 'OMP_NUM_THREADS' in environ.keys():
+    xgb_params['nthread'] = int(environ['OMP_NUM_THREADS'])
+
+columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function', 'prep_function',
+                            'threads', 'dtype', 'size', 'num_trees', 'time', 'prep_time')
+
+if params.objective.startswith('reg'):
+    task = 'regression'
+    metric_name, metric_func = 'rmse', rmse_score
+    columns += ('rmse',)
+else:
+    task = 'classification'
+    metric_name, metric_func = 'accuracy[%]', get_accuracy
+    columns += ('n_classes', 'accuracy')
+    if 'cudf' in str(type(y_train)):
+        params.n_classes = y_train[y_train.columns[0]].nunique()
+    else:
+        params.n_classes = len(np.unique(y_train))
+    if params.n_classes > 2:
+        xgb_params['num_class'] = params.n_classes
+
+t_creat_train, dtrain = measure_function_time(xgb.DMatrix, X_train, params=params, label=y_train)
+
+t_creat_test, dtest = measure_function_time(xgb.DMatrix, X_test, params=params)
+
+
+def fit(dmatrix=None):
+    if dmatrix is None:
+        dmatrix = xgb.DMatrix(X_train, y_train)
+    return xgb.train(xgb_params, dmatrix, params.n_estimators)
+
+
+def predict():
+    dmatrix = xgb.DMatrix(X_test, y_test)
+    return model_xgb.predict(dmatrix)
+
+
+t_train, model_xgb = measure_function_time(
+    fit, None if params.count_dmatrix else dtrain, params=params)
+train_metric = None
+if not X_train.equals(X_test):
+    y_train_pred = model_xgb.predict(dtrain)
+    train_metric = metric_func(y_train, y_train_pred)
+
+t_xgb_pred, y_test_pred = measure_function_time(predict, params=params)
+test_metric_xgb = metric_func(y_test, y_test_pred)
+
+t_trans, model_daal = measure_function_time(
+    daal4py.get_gbt_model_from_xgboost, model_xgb, params=params)
+
+if hasattr(params, 'n_classes'):
+    predict_algo = daal4py.gbt_classification_prediction(
+        nClasses=params.n_classes, resultsToEvaluate='computeClassLabels', fptype='float')
+    t_daal_pred, daal_pred = measure_function_time(
+        predict_algo.compute, X_test, model_daal, params=params)
+    test_metric_daal = metric_func(y_test, daal_pred.prediction)
+else:
+    predict_algo = daal4py.gbt_regression_prediction()
+    t_daal_pred, daal_pred = measure_function_time(
+        predict_algo.compute, X_test, model_daal, params=params)
+    test_metric_daal = metric_func(y_test, daal_pred.prediction)
+
+print_output(
+    library='modelbuilders', algorithm=f'xgboost_{task}_and_modelbuilder',
+    stages=['xgboost_train', 'xgboost_predict', 'daal4py_predict'],
+    columns=columns, params=params,
+    functions=['xgb_dmatrix', 'xgb_dmatrix', 'xgb_train', 'xgb_predict', 'xgb_to_daal',
+               'daal_compute'],
+    times=[t_creat_train, t_train, t_creat_test, t_xgb_pred, t_trans, t_daal_pred],
+    accuracy_type=metric_name, accuracies=[train_metric, test_metric_xgb, test_metric_daal],
+    data=[X_train, X_test, X_test])
diff --git a/xgboost/gbt.py b/xgboost/gbt.py
index 701f5c488..91c7d76d8 100644
--- a/xgboost/gbt.py
+++ b/xgboost/gbt.py
@@ -65,6 +65,12 @@ def convert_xgb_predictions(y_pred, objective):
                     choices=('reg:squarederror', 'binary:logistic',
                              'multi:softmax', 'multi:softprob'),
                     help='Control a balance of positive and negative weights')
+parser.add_argument('--count-dmatrix', default=False, action='store_true',
+                    help='Count DMatrix creation in time measurements')
+parser.add_argument('--single-precision-histogram', default=False, action='store_true',
+                    help='Build histograms instead of double precision')
+parser.add_argument('--enable-experimental-json-serialization', default=True,
+                    choices=('True', 'False'), help='Use JSON to store memory snapshots')
 
 params = parse_args(parser)
 
@@ -92,7 +98,9 @@ def convert_xgb_predictions(y_pred, objective):
     'max_leaves': params.max_leaves,
     'max_bin': params.max_bin,
     'objective': params.objective,
-    'seed': params.seed
+    'seed': params.seed,
+    'single_precision_histogram': params.single_precision_histogram,
+    'enable_experimental_json_serialization': params.enable_experimental_json_serialization
 }
 
 if params.threads != -1:
@@ -122,14 +130,26 @@ def convert_xgb_predictions(y_pred, objective):
 
 dtrain = xgb.DMatrix(X_train, y_train)
 dtest = xgb.DMatrix(X_test, y_test)
+if params.count_dmatrix:
+    def fit():
+        dtrain = xgb.DMatrix(X_train, y_train)
+        return xgb.train(xgb_params, dtrain, params.n_estimators)
+
+    def predict():
+        dtest = xgb.DMatrix(X_test, y_test)
+        return booster.predict(dtest)
+else:
+    def fit():
+        return xgb.train(xgb_params, dtrain, params.n_estimators)
+
+    def predict():
+        return booster.predict(dtest)
 
-fit_time, booster = measure_function_time(
-    xgb.train, xgb_params, dtrain, params.n_estimators, params=params)
+fit_time, booster = measure_function_time(fit, params=params)
 y_pred = convert_xgb_predictions(booster.predict(dtrain), params.objective)
 train_metric = metric_func(y_pred, y_train)
 
-predict_time, y_pred = measure_function_time(
-    booster.predict, dtest, params=params)
+predict_time, y_pred = measure_function_time(predict, params=params)
 test_metric = metric_func(
     convert_xgb_predictions(y_pred, params.objective), y_test)