In [1]:
import os
import pickle
import time

from lightautoml.dataset.np_pd_dataset_cupy import *
from lightautoml.dataset.roles import *
from lightautoml.dataset.utils import roles_parser

from lightautoml.tasks import Task

from lightautoml.ml_algo import linear_gpu

# Initial data loading

In [2]:
data = pd.read_csv('./example_data/test_data_files/sampled_app_train.csv',
                       usecols=['TARGET', 'NAME_CONTRACT_TYPE', 'AMT_CREDIT',
                                'NAME_TYPE_SUITE', 'AMT_GOODS_PRICE',
                                'DAYS_BIRTH', 'DAYS_EMPLOYED'])

# Fix dates and convert to date type

data['BIRTH_DATE'] = np.datetime64('2018-01-01') + data['DAYS_BIRTH'].astype(np.dtype('timedelta64[D]'))
data['EMP_DATE'] = np.datetime64('2018-01-01') + np.clip(data['DAYS_EMPLOYED'], None, 0).astype(np.dtype('timedelta64[D]'))
data.drop(['DAYS_BIRTH', 'DAYS_EMPLOYED'], axis=1, inplace=True)

# Create folds
data['__fold__'] = np.random.randint(0, 5, len(data))

# Print data head
print(data.head())

# # Set roles for columns

check_roles = {
    TargetRole(): 'TARGET',
    CategoryRole(dtype=str): ['NAME_CONTRACT_TYPE', 'NAME_TYPE_SUITE'],
    NumericRole(np.float32): ['AMT_CREDIT', 'AMT_GOODS_PRICE'],
    DatetimeRole(seasonality=['y', 'm', 'wd']): ['BIRTH_DATE', 'EMP_DATE'],
    FoldsRole(): '__fold__'
}

# create Task
task = Task('binary')

# # Creating PandasDataSet
pd_dataset = PandasDataset(data, roles_parser(check_roles), task=task)

cudf_dataset = pd_dataset.to_cudf()

   TARGET NAME_CONTRACT_TYPE  AMT_CREDIT  ...  BIRTH_DATE   EMP_DATE __fold__
0       0         Cash loans    327024.0  ...  1967-02-08 2017-05-20        3
1       0         Cash loans    675000.0  ...  1962-05-19 2007-07-16        4
2       0    Revolving loans    270000.0  ...  1991-11-14 2015-01-21        2
3       0         Cash loans    142200.0  ...  1986-09-25 2011-10-13        4
4       0         Cash loans   1483231.5  ...  1981-11-08 2013-02-21        4

[5 rows x 8 columns]
