# AutoML 알고리즘

In [1]:
# pip install sklearn
# pip install h2o
# pip install matplotlib
# pip install statsmodels
# pip install seaborn

In [2]:
##### 라이브러리 호출 #####
import numpy as np
import pandas as pd
import time
import glob
import pickle
import itertools

import h2o
from h2o.automl import H2OAutoML
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from sklearn.model_selection import train_test_split
from statsmodels.formula.api import ols

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 100)  # 데이터프레임 출력 옵션

------

## 입력값 기입

In [3]:
# test년도 입력
test_year = '2021'
# y 컬럼명
y_colnm = 'SEP_CNT'

In [4]:
## h2o 호출
h2o.init(nthreads=1)

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "1.8.0_312"; OpenJDK Runtime Environment (build 1.8.0_312-8u312-b07-0ubuntu1~20.04-b07); OpenJDK 64-Bit Server VM (build 25.312-b07, mixed mode)
  Starting server from /home/lime/.local/lib/python3.8/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpl53z0pgj
  JVM stdout: /tmp/tmpl53z0pgj/h2o_lime_started_from_python.out
  JVM stderr: /tmp/tmpl53z0pgj/h2o_lime_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,01 secs
H2O_cluster_timezone:,Asia/Seoul
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.34.0.7
H2O_cluster_version_age:,24 days
H2O_cluster_name:,H2O_from_python_lime_6fo7bo
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,4.271 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,1


------

In [5]:
total_start_time = time.time()

In [6]:
# 파일 리스트 호출
file_list = pd.DataFrame(glob.glob('data/*.csv')).rename(columns = {0:'col'})
file_list = list(file_list['col'].apply(lambda x : x[:-4]))
file_list.sort()

In [7]:
file_list

['data/merge_age',
 'data/merge_age_half',
 'data/merge_age_month',
 'data/merge_age_quarter',
 'data/merge_cls',
 'data/merge_cls_half',
 'data/merge_cls_month',
 'data/merge_cls_quarter',
 'data/merge_dis',
 'data/merge_dis_half',
 'data/merge_dis_month',
 'data/merge_dis_quarter']

In [8]:
file_list = ['data/merge_dis',
             'data/merge_dis_half',
             'data/merge_dis_month',
             'data/merge_dis_quarter']

In [None]:
for file_nm in file_list:

    print(file_nm)
    
    ## 데이터 호출
    tot_data = pd.read_csv(file_nm + '.csv', dtype='str', encoding = 'cp949')
    # tot_data = tot_data.fillna(0)  # null값 처리

    # ---------------------------------------------------------- #
    # 월 데이터
    if file_nm.find('month') != -1:
        tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['MONTH']
        add_except_col = ['YEAR','MONTH','STAND_TIME']
    # 분기 데이터
    elif file_nm.find('quarter') != -1:
        tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['QUARTER']
        add_except_col = ['YEAR','QUARTER','STAND_TIME']
    # 반기 데이터
    elif file_nm.find('half') != -1:
        tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['HALF']
        add_except_col = ['YEAR','HALF','STAND_TIME']
    # 연 데이터
    else:
        tot_data['YEAR'] = tot_data['BASE_YY']  # YEAR 컬럼 생성
        tot_data['STAND_TIME'] = tot_data['BASE_YY']  # YEAR 컬럼 생성
        del tot_data['BASE_YY']  # 기존 BASE_YY 컬럼 삭제
        add_except_col = ['YEAR','STAND_TIME']
    # ---------------------------------------------------------- #

    ## 호출한 파일의 데이터 구분자 컬럼 정의(COL1 & COL2) 
    COL_LIST = list(tot_data.columns[0:3][tot_data.columns[0:3] != 'YEAR'])
    COL1 = COL_LIST[0]
    COL2 = COL_LIST[1]

    ## train 연도 정의 : train_year (전체 기간 중 위에서 정의한 test년도 제외)
    train_year = list(tot_data['YEAR'].unique())
    train_year.remove(test_year)

    ## 독립변수 컬럼명 정의 : x_colnm
    tot_colnm = list(tot_data.columns)  # 전체 컬럼명
    except_colnm = ([COL1,COL2,y_colnm] + add_except_col)  # 제외할 컬럼명
    x_colnm = list(set(tot_colnm).difference(set(except_colnm)))  # x 컬럼명

    COL1_list = list(tot_data[COL1].unique())
    COL2_list = list(tot_data[COL2].unique())

    for col1 in COL1_list:
        for col2 in COL2_list:
            
            print(col1, col2)
            
            # ---------------------------------------------------------- #
            # 분석 수행 데이터 정의(생성)
            data = tot_data.loc[(tot_data[COL1] == col1) & (tot_data[COL2] == col2),].sort_values(by = 'STAND_TIME').reset_index(drop=True)
            # ---------------------------------------------------------- #
            # 데이터 형 변환(str -> float)
            for chg_col in ([y_colnm] + x_colnm):
                data[chg_col] = data[chg_col].astype('float')
            # ---------------------------------------------------------- #
            # train과 test로 분리
            train = data.loc[data['YEAR'].isin(train_year),[y_colnm] + x_colnm]
            test = data.loc[~data['YEAR'].isin(train_year),[y_colnm] + x_colnm]
            # ---------------------------------------------------------- #
            # x와 y로 분리
            train_x = train[x_colnm].reset_index(drop=True)
            train_y = train[[y_colnm]].reset_index(drop=True)
            test_x = test[x_colnm].reset_index(drop=True)
            test_y = test[[y_colnm]].reset_index(drop=True)
            # ---------------------------------------------------------- #
            # # 표준화1(StandardScaler) : 평균 = 0 / 표준편차 = 1
            # from sklearn.preprocessing import StandardScaler
            # scaler = StandardScaler()   
            # std_train_x = pd.DataFrame(scaler.fit_transform(train_x), columns = list(train_x.columns))
            # std_test_x = pd.DataFrame(scaler.transform(test_x), columns = list(test_x.columns))

            # # 표준화2(Normalization) : MinMaxScaler : 최소값 0 ~ 최대값 1 : 반드시 이상치 제거 과정을 거친 후 작업해야함 
            # from sklearn.preprocessing import MinMaxScaler
            # scaler = MinMaxScaler()
            # nor_std_train_x = pd.DataFrame(scaler.fit_transform(std_train_x), columns = list(train_x.columns))
            # nor_std_test_x = pd.DataFrame(scaler.transform(std_test_x), columns = list(test_x.columns))

            # 표준화3(RobustScaler) : 중앙값 = 0 / IQR(1분위(25%) ~ 3분위(75%)) = 1 : 이상치(outlier) 영향 최소화 / 더 넓게 분포
            from sklearn.preprocessing import RobustScaler
            scaler = RobustScaler()
            Robust_train_x = pd.DataFrame(scaler.fit_transform(train_x), columns = list(train_x.columns))
            Robust_test_x = pd.DataFrame(scaler.transform(test_x), columns = list(test_x.columns))
            # ---------------------------------------------------------- #

            ####################### 변수 선택 과정 #######################

            ## <상관분석>
            # 상관관계는 train 데이터로만 구해야함(test 데이터 이용 X)
            corr_data = pd.concat([train_y,Robust_train_x], axis = 1)
            corr_rslt = corr_data.corr(method = 'pearson')  # default는 method = 'pearson'
            corr_rslt = corr_rslt.reset_index().rename(columns = {'index':'COLNM'})
            corr_rslt = corr_rslt.loc[corr_rslt['COLNM'] != y_colnm,]
            corr_rslt = corr_rslt[corr_rslt[y_colnm] >= 0.5]

            # x_corr = corr_rslt[['COLNM'] + list(corr_rslt['COLNM'])].set_index('COLNM')
            # x_corr[x_corr < 0.95]

            # 모델에 사용할 독립변수 목록 
            mdl_x_colnm = list(corr_rslt['COLNM'])

            # 모델에 사용할 train, test 데이터셋
            mdl_train_data = pd.concat([train_y, Robust_train_x], axis = 1)
            mdl_test_data = Robust_test_x

            # ---------------------------------------------------------- #
    #         ## h2o 호출
    #         h2o.init(nthreads=1)
            # ---------------------------------------------------------- #
            ## h2o 데이터프레임 형식으로 변환
            h2o_train_data = h2o.H2OFrame(mdl_train_data)
            h2o_test_data = h2o.H2OFrame(mdl_test_data)

            ## 모델 생성
    #         start_time = time.time()
            model = H2OAutoML(max_models=20, max_runtime_secs=10, seed=1234)
            model.train(x = mdl_x_colnm, y = y_colnm,
                        training_frame = h2o_train_data)  # x : 독립변수 / y : 종속변수 / training_frame : 학습데이터 / 모델 검증은 pass
    #         print('모델 생성 시간 : ', time.time() - start_time)
            # --------------------------------------------------------------- #
            # # View the AutoML Leaderboard
            # lb = model.leaderboard
            # lb.head(rows = 10)  # 가장 성능 좋은 모델 top 10개 확인
            # model.leader  # 리더보드 값 확인 : The leader model is stored here

            # ## 모델 조사
            # m = model.leader  # Get the best model using the metric
            # m = model.get_best_model()  # this is equivalent to

            ## AutoML 출력
            # Get leaderboard with all possible columns
            lb = h2o.automl.get_leaderboard(model, extra_columns = "ALL")  # lb : top 10개 모델에 대한 리더보드 확인
            save_lb = lb.as_data_frame()  # pandas 데이터프레임으로 형변환
            # --------------------------------------------------------------- #
            ## 예측 수행
            pred = model.predict(h2o_test_data)

            ## h2o 데이터프레임을 pandas 데이터프레임으로 변환
            pred = h2o.as_list(pred, use_pandas=True)  # 또는 pred.as_data_frame()
            pred.rename(columns={'predict':'PREDICT'}, inplace=True)
            # --------------------------------------------------------------------------------------- #
    #         ## h2o 종료
    #         h2o.cluster().shutdown()
            # ---------------------------------------------------------- #

            ## 결과값 정리
            rslt = pd.concat([pred, test_y], axis = 1)
            rslt['DIFF'] = rslt['PREDICT'] - rslt['SEP_CNT']
            rslt['target'] = (col1 + '_' + col2)
            rslt['mdl_x_colnm'] = str(mdl_x_colnm)
            rslt['BEST_MDL'] = save_lb['model_id'][0]
            rslt['MSE'] = (rslt['DIFF']**2)
            rslt['MSE'] = round(rslt['MSE'].mean(),4)
            rslt['stand_time'] = list(data.loc[~data['YEAR'].isin(train_year),'STAND_TIME'])
            rslt = rslt[['target', 'stand_time', 'PREDICT', 'SEP_CNT', 'MSE', 'BEST_MDL']]

            ## 결과값 저장
            if (col1 == COL1_list[0]) & (col2 == COL2_list[0]):
                col1_col2_rslt = rslt
            else:
                col1_col2_rslt = col1_col2_rslt.append(rslt)

    col1_col2_rslt.to_csv('result/result_' + file_nm.split('/')[1] + '.csv', index=False, encoding = 'utf-8')

data/merge_dis
강원 특수_건강
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█████
21:47:34.574: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 11.0.
21:47:34.575: Skipping training of model GBM_1_AutoML_1_20220114_214732 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_1_AutoML_1_20220114_214732.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 11.0.


██████████████████████████████
21:47:38.607: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:47:38.607: Skipping training of model GBM_2_AutoML_1

████████
21:48:02.246: StackedEnsemble_BestOfFamily_2_AutoML_3_20220114_214756 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:48:03.248: StackedEnsemble_AllModels_1_AutoML_3_20220114_214756 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
강원 특수_의사소통
Parse progress: |█████████

█████████████████████
21:48:35.327: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:48:35.327: Skipping training of model GBM_2_AutoML_6_20220114_214830 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_6_20220114_214830.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:48:35.328: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:48:35.328: Skipping training of model GBM_3_AutoML_6_20220114_214830 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_6_20220114_214830.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0

█████████
21:48:59.546: StackedEnsemble_AllModels_1_AutoML_8_20220114_214852 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

███████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
강원 특수_청각
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█████
21:49:05.665: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 11.0.
21:49:05.665: Skipping training of model GBM_1_AutoML_9_20220114_214903 due to exception: water

█████████
21:49:32.543: StackedEnsemble_BestOfFamily_2_AutoML_11_20220114_214926 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
21:49:33.548: StackedEnsemble_AllModels_1_AutoML_11_20220114_214926 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
경기 특수_발달지체
Parse progress: |███████

█████████████████████
21:50:05.193: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:50:05.194: Skipping training of model GBM_2_AutoML_14_20220114_215000 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_14_20220114_215000.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:50:05.195: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:50:05.195: Skipping training of model GBM_3_AutoML_14_20220114_215000 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_14_20220114_215000.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
21:50:28.832: StackedEnsemble_BestOfFamily_2_AutoML_16_20220114_215022 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:50:29.834: StackedEnsemble_AllModels_1_AutoML_16_20220114_215022 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%
경기 특수_지적
Parse progress: |████████

█████████████████████
21:51:01.899: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:51:01.899: Skipping training of model GBM_2_AutoML_19_20220114_215056 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_19_20220114_215056.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:51:01.900: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:51:01.900: Skipping training of model GBM_3_AutoML_19_20220114_215056 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_19_20220114_215056.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
21:51:25.475: StackedEnsemble_BestOfFamily_2_AutoML_21_20220114_215119 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:51:26.479: StackedEnsemble_AllModels_1_AutoML_21_20220114_215119 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
경남 특수_발달지체
Parse progress: |███████

█████████████████████
21:51:57.949: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:51:57.949: Skipping training of model GBM_2_AutoML_24_20220114_215152 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_24_20220114_215152.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:51:57.951: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:51:57.951: Skipping training of model GBM_3_AutoML_24_20220114_215152 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_24_20220114_215152.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
21:52:21.785: StackedEnsemble_BestOfFamily_2_AutoML_26_20220114_215215 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
21:52:22.788: StackedEnsemble_AllModels_1_AutoML_26_20220114_215215 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
경남 특수_지적
Parse progress: |█████████

█████████████████████
21:52:54.258: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:52:54.258: Skipping training of model GBM_2_AutoML_29_20220114_215249 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_29_20220114_215249.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:52:54.260: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:52:54.260: Skipping training of model GBM_3_AutoML_29_20220114_215249 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_29_20220114_215249.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
21:53:17.875: StackedEnsemble_BestOfFamily_2_AutoML_31_20220114_215311 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:53:18.880: StackedEnsemble_AllModels_1_AutoML_31_20220114_215311 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
gbm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
경북 특수_발달지체
Parse progress: |███████

█████████████████████
21:53:51.0: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:53:51.0: Skipping training of model GBM_2_AutoML_34_20220114_215345 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_34_20220114_215345.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:53:51.2: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:53:51.2: Skipping training of model GBM_3_AutoML_34_20220114_215345 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_34_20220114_215345.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: mu

████████
21:54:14.783: StackedEnsemble_BestOfFamily_2_AutoML_36_20220114_215408 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:54:15.786: StackedEnsemble_AllModels_1_AutoML_36_20220114_215408 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
경북 특수_지적
Parse progress: |█████████

████████████████████
21:54:47.519: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:54:47.519: Skipping training of model GBM_2_AutoML_39_20220114_215442 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_39_20220114_215442.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:54:47.521: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:54:47.521: Skipping training of model GBM_3_AutoML_39_20220114_215442 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_39_20220114_215442.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=1

█████████
21:55:10.865: StackedEnsemble_BestOfFamily_2_AutoML_41_20220114_215504 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
21:55:11.868: StackedEnsemble_AllModels_1_AutoML_41_20220114_215504 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
광주 특수_발달지체
Parse progress: |███████

████████████████████
21:55:43.314: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:55:43.314: Skipping training of model GBM_2_AutoML_44_20220114_215538 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_44_20220114_215538.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:55:43.316: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:55:43.316: Skipping training of model GBM_3_AutoML_44_20220114_215538 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_44_20220114_215538.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=1

████████
21:56:06.698: StackedEnsemble_BestOfFamily_2_AutoML_46_20220114_215600 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:56:07.700: StackedEnsemble_AllModels_1_AutoML_46_20220114_215600 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
광주 특수_지적
Parse progress: |█████████

█████████████████████
21:56:39.144: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:56:39.144: Skipping training of model GBM_2_AutoML_49_20220114_215634 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_49_20220114_215634.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:56:39.146: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:56:39.146: Skipping training of model GBM_3_AutoML_49_20220114_215634 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_49_20220114_215634.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
21:57:02.512: StackedEnsemble_BestOfFamily_2_AutoML_51_20220114_215656 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:57:03.517: StackedEnsemble_AllModels_1_AutoML_51_20220114_215656 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
대구 특수_발달지체
Parse progress: |███████

█████████████████████
21:57:35.389: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:57:35.389: Skipping training of model GBM_2_AutoML_54_20220114_215730 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_54_20220114_215730.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:57:35.390: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:57:35.390: Skipping training of model GBM_3_AutoML_54_20220114_215730 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_54_20220114_215730.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
21:57:58.973: StackedEnsemble_BestOfFamily_2_AutoML_56_20220114_215752 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
21:57:59.977: StackedEnsemble_AllModels_1_AutoML_56_20220114_215752 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
대구 특수_지적
Parse progress: |█████████

█████████████████████
21:58:31.384: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:58:31.384: Skipping training of model GBM_2_AutoML_59_20220114_215826 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_59_20220114_215826.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:58:31.385: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:58:31.385: Skipping training of model GBM_3_AutoML_59_20220114_215826 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_59_20220114_215826.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
21:58:54.784: StackedEnsemble_BestOfFamily_2_AutoML_61_20220114_215848 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
21:58:55.787: StackedEnsemble_AllModels_1_AutoML_61_20220114_215848 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
대전 특수_발달지체
Parse progress: |███████

████████████████████
21:59:28.403: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:59:28.403: Skipping training of model GBM_2_AutoML_64_20220114_215923 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_64_20220114_215923.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

21:59:28.404: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
21:59:28.404: Skipping training of model GBM_3_AutoML_64_20220114_215923 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_64_20220114_215923.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=1

█████████
21:59:52.195: StackedEnsemble_BestOfFamily_2_AutoML_66_20220114_215946 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
21:59:53.199: StackedEnsemble_AllModels_1_AutoML_66_20220114_215946 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
대전 특수_지적
Parse progress: |█████████

█████████████████████
22:00:24.833: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:00:24.834: Skipping training of model GBM_2_AutoML_69_20220114_220019 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_69_20220114_220019.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:00:24.835: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:00:24.835: Skipping training of model GBM_3_AutoML_69_20220114_220019 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_69_20220114_220019.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=


█████████
22:00:48.36: StackedEnsemble_BestOfFamily_2_AutoML_71_20220114_220042 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
22:00:49.39: StackedEnsemble_AllModels_1_AutoML_71_20220114_220042 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
부산 특수_발달지체
Parse progress: |████████

█████████████████████
22:01:21.103: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:01:21.104: Skipping training of model GBM_2_AutoML_74_20220114_220116 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_74_20220114_220116.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:01:21.105: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:01:21.105: Skipping training of model GBM_3_AutoML_74_20220114_220116 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_74_20220114_220116.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
22:01:44.732: StackedEnsemble_BestOfFamily_2_AutoML_76_20220114_220138 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
22:01:45.736: StackedEnsemble_AllModels_1_AutoML_76_20220114_220138 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%
부산 특수_지적
Parse progress: |█████████

█████████████████████
22:02:17.816: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:02:17.816: Skipping training of model GBM_2_AutoML_79_20220114_220212 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_79_20220114_220212.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:02:17.817: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:02:17.818: Skipping training of model GBM_3_AutoML_79_20220114_220212 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_79_20220114_220212.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
22:02:41.400: StackedEnsemble_BestOfFamily_2_AutoML_81_20220114_220235 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
22:02:42.404: StackedEnsemble_AllModels_1_AutoML_81_20220114_220235 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
서울 특수_발달지체
Parse progress: |███████

█████████████████████
22:03:13.848: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:03:13.848: Skipping training of model GBM_2_AutoML_84_20220114_220308 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_84_20220114_220308.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:03:13.849: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:03:13.849: Skipping training of model GBM_3_AutoML_84_20220114_220308 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_84_20220114_220308.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
22:03:37.258: StackedEnsemble_BestOfFamily_2_AutoML_86_20220114_220331 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:03:38.261: StackedEnsemble_AllModels_1_AutoML_86_20220114_220331 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
서울 특수_지적
Parse progress: |█████████

█████████████████████
22:04:09.674: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:04:09.675: Skipping training of model GBM_2_AutoML_89_20220114_220404 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_89_20220114_220404.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:04:09.676: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:04:09.676: Skipping training of model GBM_3_AutoML_89_20220114_220404 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_89_20220114_220404.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
22:04:33.262: StackedEnsemble_BestOfFamily_2_AutoML_91_20220114_220427 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:04:34.265: StackedEnsemble_AllModels_1_AutoML_91_20220114_220427 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%
울산 특수_발달지체
Parse progress: |███████

█████████████████████
22:05:05.486: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:05:05.486: Skipping training of model GBM_2_AutoML_94_20220114_220500 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_94_20220114_220500.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:05:05.487: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:05:05.487: Skipping training of model GBM_3_AutoML_94_20220114_220500 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_94_20220114_220500.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
22:05:28.834: StackedEnsemble_BestOfFamily_2_AutoML_96_20220114_220522 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:05:29.837: StackedEnsemble_AllModels_1_AutoML_96_20220114_220522 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
울산 특수_지적
Parse progress: |█████████

█████████████████████
22:06:01.477: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:06:01.477: Skipping training of model GBM_2_AutoML_99_20220114_220556 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_99_20220114_220556.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:06:01.478: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:06:01.478: Skipping training of model GBM_3_AutoML_99_20220114_220556 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_99_20220114_220556.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

████████
22:06:24.594: StackedEnsemble_BestOfFamily_2_AutoML_101_20220114_220618 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:06:25.598: StackedEnsemble_AllModels_1_AutoML_101_20220114_220618 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%
인천 특수_발달지체
Parse progress: |█████

█████████████████████
22:06:56.812: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:06:56.812: Skipping training of model GBM_2_AutoML_104_20220114_220651 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_104_20220114_220651.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:06:56.813: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:06:56.813: Skipping training of model GBM_3_AutoML_104_20220114_220651 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_104_20220114_220651.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

█████████
22:07:20.58: StackedEnsemble_BestOfFamily_2_AutoML_106_20220114_220714 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:07:21.62: StackedEnsemble_AllModels_1_AutoML_106_20220114_220714 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
gbm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
인천 특수_지적
Parse progress: |████████

█████████████████████
22:07:52.913: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:07:52.913: Skipping training of model GBM_2_AutoML_109_20220114_220747 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_109_20220114_220747.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:07:52.914: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:07:52.914: Skipping training of model GBM_3_AutoML_109_20220114_220747 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_109_20220114_220747.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:08:16.112: StackedEnsemble_BestOfFamily_2_AutoML_111_20220114_220810 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:08:17.116: StackedEnsemble_AllModels_1_AutoML_111_20220114_220810 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
전남 특수_발달지체
Parse progress: |█████

█████████████████████
22:08:48.759: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:08:48.759: Skipping training of model GBM_2_AutoML_114_20220114_220843 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_114_20220114_220843.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:08:48.761: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:08:48.761: Skipping training of model GBM_3_AutoML_114_20220114_220843 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_114_20220114_220843.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

█████████
22:09:12.376: StackedEnsemble_BestOfFamily_2_AutoML_116_20220114_220906 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
22:09:13.382: StackedEnsemble_AllModels_1_AutoML_116_20220114_220906 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
전남 특수_지적
Parse progress: |███████

█████████████████████
22:09:44.860: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:09:44.860: Skipping training of model GBM_2_AutoML_119_20220114_220939 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_119_20220114_220939.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:09:44.861: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:09:44.861: Skipping training of model GBM_3_AutoML_119_20220114_220939 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_119_20220114_220939.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:10:08.230: StackedEnsemble_BestOfFamily_2_AutoML_121_20220114_221002 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:10:09.236: StackedEnsemble_AllModels_1_AutoML_121_20220114_221002 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
전북 특수_발달지체
Parse progress: |█████

█████████████████████
22:10:40.902: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:10:40.902: Skipping training of model GBM_2_AutoML_124_20220114_221035 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_124_20220114_221035.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:10:40.903: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:10:40.904: Skipping training of model GBM_3_AutoML_124_20220114_221035 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_124_20220114_221035.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:11:04.286: StackedEnsemble_BestOfFamily_2_AutoML_126_20220114_221058 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:11:05.292: StackedEnsemble_AllModels_1_AutoML_126_20220114_221058 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
전북 특수_지적
Parse progress: |███████

█████████████████████
22:11:37.317: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:11:37.317: Skipping training of model GBM_2_AutoML_129_20220114_221132 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_129_20220114_221132.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:11:37.318: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:11:37.318: Skipping training of model GBM_3_AutoML_129_20220114_221132 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_129_20220114_221132.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:12:00.664: StackedEnsemble_BestOfFamily_2_AutoML_131_20220114_221154 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:12:01.668: StackedEnsemble_AllModels_1_AutoML_131_20220114_221154 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
제주 특수_발달지체
Parse progress: |█████

█████████████████████
22:12:33.127: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:12:33.127: Skipping training of model GBM_2_AutoML_134_20220114_221228 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_134_20220114_221228.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:12:33.128: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:12:33.128: Skipping training of model GBM_3_AutoML_134_20220114_221228 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_134_20220114_221228.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:12:56.511: StackedEnsemble_BestOfFamily_2_AutoML_136_20220114_221250 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:12:57.515: StackedEnsemble_AllModels_1_AutoML_136_20220114_221250 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
제주 특수_지적
Parse progress: |███████

█████████████████████
22:13:28.935: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:13:28.935: Skipping training of model GBM_2_AutoML_139_20220114_221323 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_139_20220114_221323.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:13:28.936: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:13:28.936: Skipping training of model GBM_3_AutoML_139_20220114_221323 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_139_20220114_221323.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:13:52.326: StackedEnsemble_BestOfFamily_2_AutoML_141_20220114_221346 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:13:53.330: StackedEnsemble_AllModels_1_AutoML_141_20220114_221346 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
충남 특수_발달지체
Parse progress: |█████

█████████████████████
22:14:24.545: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:14:24.545: Skipping training of model GBM_2_AutoML_144_20220114_221419 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_144_20220114_221419.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:14:24.546: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:14:24.546: Skipping training of model GBM_3_AutoML_144_20220114_221419 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_144_20220114_221419.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:14:47.685: StackedEnsemble_BestOfFamily_2_AutoML_146_20220114_221441 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:14:48.689: StackedEnsemble_AllModels_1_AutoML_146_20220114_221441 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
충남 특수_지적
Parse progress: |███████

████████████████████
22:15:20.110: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:15:20.110: Skipping training of model GBM_2_AutoML_149_20220114_221515 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_149_20220114_221515.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:15:20.111: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:15:20.111: Skipping training of model GBM_3_AutoML_149_20220114_221515 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_149_20220114_221515.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_ro

████████
22:15:43.408: StackedEnsemble_BestOfFamily_2_AutoML_151_20220114_221537 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:15:44.412: StackedEnsemble_AllModels_1_AutoML_151_20220114_221537 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
충북 특수_발달지체
Parse progress: |█████

█████████████████████
22:16:16.49: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:16:16.49: Skipping training of model GBM_2_AutoML_154_20220114_221611 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_154_20220114_221611.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:16:16.50: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:16:16.50: Skipping training of model GBM_3_AutoML_154_20220114_221611 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_154_20220114_221611.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=

█████████
22:16:39.190: StackedEnsemble_BestOfFamily_2_AutoML_156_20220114_221633 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████
22:16:40.195: StackedEnsemble_AllModels_1_AutoML_156_20220114_221633 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%
충북 특수_지적
Parse progress: |███████

█████████████████████
22:17:11.676: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:17:11.676: Skipping training of model GBM_2_AutoML_159_20220114_221706 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_159_20220114_221706.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.

22:17:11.677: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 11.0.
22:17:11.677: Skipping training of model GBM_3_AutoML_159_20220114_221706 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_159_20220114_221706.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_r

████████
22:17:35.72: StackedEnsemble_BestOfFamily_2_AutoML_161_20220114_221729 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:17:36.76: StackedEnsemble_AllModels_1_AutoML_161_20220114_221729 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
세종 특수_발달지체
Parse progress: |███████

████████████████████| (done) 100%
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
세종 특수_의사소통
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█████
22:18:04.247: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 8.0.
22:18:04.247: Skipping training of model GBM_1_AutoML_164_20220114_221802 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_1_AutoML_164_20220114_221802.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 8.0.


█████
22:18:05.251: StackedEnsemble_BestOfFamily_1_AutoML_164_20220114_221802 [StackedEnsemble best_of_family_1 (built

█████
22:18:27.581: StackedEnsemble_BestOfFamily_1_AutoML_166_20220114_221824 [StackedEnsemble best_of_family_1 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████
22:18:29.592: _min_rows param, The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 8.0.
22:18:29.592: Skipping training of model GBM_2_AutoML_166_20220114_221824 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_166_20220114_221824.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 8.0.

22:18:29.594: _min_rows param

████████
22:18:52.946: StackedEnsemble_BestOfFamily_2_AutoML_168_20220114_221846 [StackedEnsemble best_of_family_2 (built with AUTO metalearner, using top model from each algorithm type)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

█████████
22:18:53.951: StackedEnsemble_AllModels_1_AutoML_168_20220114_221846 [StackedEnsemble all_2 (built with AUTO metalearner, using all AutoML models)] failed: java.lang.RuntimeException: water.exceptions.H2OIllegalArgumentException: Not enough data to create 5 random cross-validation splits. Either reduce nfolds, specify a larger dataset (or specify another random number seed, if applicable).

████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
세종 특수_청각
Parse progress: |███████

█████████████████████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
data/merge_dis_half
강원 특수_건강
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█████
22:19:22.210: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 20.0.
22:19:22.210: Skipping training of model GBM_1_AutoML_171_20220114_221920 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_1_AutoML_171_20220114_221920.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 20.0.


█████████████████████████████
22:19:26.230: GBM_2_AutoML_171_20220114_221920 [GBM def_2] failed: 

█████████████████████████████
22:19:48.610: GBM_2_AutoML_173_20220114_221942 [GBM def_2] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_173_20220114_221942.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


█████████
22:19:49.614: GBM_3_AutoML_17

█████████
22:20:11.781: GBM_3_AutoML_175_20220114_222004 [GBM def_3] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_175_20220114_222004.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


████████
22:20:12.785: GBM_4_AutoML_175_20220114_222004 [GB

████████
22:20:36.134: GBM_4_AutoML_177_20220114_222027 [GBM def_4] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_4_AutoML_177_20220114_222027.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


████| (done) 100%
drf prediction progress: |████████████████

████| (done) 100%
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
강원 특수_학습
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█████
22:21:02.750: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 20.0.
22:21:02.750: Skipping training of model GBM_1_AutoML_180_20220114_222100 due to exception: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_1_AutoML_180_20220114_222100.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 20.0.


█████████████████████████████
22:21:06.766: GBM_2_AutoML_180_20220114_222100 [GBM def_2] failed: water.exceptions.H2OModelBuilderIlleg

█████████████████████████████
22:21:29.87: GBM_2_AutoML_182_20220114_222123 [GBM def_2] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_2_AutoML_182_20220114_222123.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


█████████
22:21:30.93: GBM_3_AutoML_182_

████████
22:21:53.226: GBM_3_AutoML_184_20220114_222145 [GBM def_3] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_3_AutoML_184_20220114_222145.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


████████
22:21:54.230: GBM_4_AutoML_184_20220114_222145 [GBM

████████
22:22:16.769: GBM_4_AutoML_186_20220114_222207 [GBM def_4] failed: water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GBM model: GBM_4_AutoML_186_20220114_222207.  Details: ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.
ERRR on field: _min_rows: The dataset size is too small to split for min_rows=10.0: must have at least 20.0 (weighted) rows, but have only 16.0.


████| (done) 100%
drf prediction progress: |████████████████

In [None]:
print('총 모델 생성 시간 : ', time.time() - total_start_time)

In [None]:
# # for file_nm in file_list:
# file_nm = (file_list[2] + '.csv')
# file_nm

In [None]:
## 데이터 호출
tot_data = pd.read_csv(file_nm, dtype='str', encoding = 'cp949')
# tot_data = tot_data.fillna(0)  # null값 처리

# ---------------------------------------------------------- #
# 월 데이터
if file_nm.find('month') != -1:
    tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['MONTH']
    add_except_col = ['YEAR','MONTH','STAND_TIME']
# 분기 데이터
elif file_nm.find('quarter') != -1:
    tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['QUARTER']
    add_except_col = ['YEAR','QUARTER','STAND_TIME']
# 반기 데이터
elif file_nm.find('half') != -1:
    tot_data['STAND_TIME'] = tot_data['YEAR'] + tot_data['HALF']
    add_except_col = ['YEAR','HALF','STAND_TIME']
# 연 데이터
else:
    tot_data['YEAR'] = tot_data['BASE_YY']  # YEAR 컬럼 생성
    tot_data['STAND_TIME'] = tot_data['BASE_YY']  # YEAR 컬럼 생성
    del tot_data['BASE_YY']  # 기존 BASE_YY 컬럼 삭제
    add_except_col = ['YEAR','STAND_TIME']
# ---------------------------------------------------------- #

## 호출한 파일의 데이터 구분자 컬럼 정의(COL1 & COL2) 
COL_LIST = list(tot_data.columns[0:3][tot_data.columns[0:3] != 'YEAR'])
COL1 = COL_LIST[0]
COL2 = COL_LIST[1]

## train 연도 정의 : train_year (전체 기간 중 위에서 정의한 test년도 제외)
train_year = list(tot_data['YEAR'].unique())
train_year.remove(test_year)

## 독립변수 컬럼명 정의 : x_colnm
tot_colnm = list(tot_data.columns)  # 전체 컬럼명
except_colnm = ([COL1,COL2,y_colnm] + add_except_col)  # 제외할 컬럼명
x_colnm = list(set(tot_colnm).difference(set(except_colnm)))  # x 컬럼명

COL1_list = list(tot_data[COL1].unique())
COL2_list = list(tot_data[COL2].unique())

for col1 in COL1_list[0]:
    for col2 in COL2_list[0]:
        # ---------------------------------------------------------- #
        # 분석 수행 데이터 정의(생성)
        data = tot_data.loc[(tot_data[COL1] == col1) & (tot_data[COL2] == col2),].sort_values(by = 'STAND_TIME').reset_index(drop=True)
        # ---------------------------------------------------------- #
        # 데이터 형 변환(str -> float)
        for chg_col in ([y_colnm] + x_colnm):
            data[chg_col] = data[chg_col].astype('float')
        # ---------------------------------------------------------- #
        # train과 test로 분리
        train = data.loc[data['YEAR'].isin(train_year),[y_colnm] + x_colnm]
        test = data.loc[~data['YEAR'].isin(train_year),[y_colnm] + x_colnm]
        # ---------------------------------------------------------- #
        # x와 y로 분리
        train_x = train[x_colnm].reset_index(drop=True)
        train_y = train[[y_colnm]].reset_index(drop=True)
        test_x = test[x_colnm].reset_index(drop=True)
        test_y = test[[y_colnm]].reset_index(drop=True)
        # ---------------------------------------------------------- #
        # # 표준화1(StandardScaler) : 평균 = 0 / 표준편차 = 1
        # from sklearn.preprocessing import StandardScaler
        # scaler = StandardScaler()   
        # std_train_x = pd.DataFrame(scaler.fit_transform(train_x), columns = list(train_x.columns))
        # std_test_x = pd.DataFrame(scaler.transform(test_x), columns = list(test_x.columns))

        # # 표준화2(Normalization) : MinMaxScaler : 최소값 0 ~ 최대값 1 : 반드시 이상치 제거 과정을 거친 후 작업해야함 
        # from sklearn.preprocessing import MinMaxScaler
        # scaler = MinMaxScaler()
        # nor_std_train_x = pd.DataFrame(scaler.fit_transform(std_train_x), columns = list(train_x.columns))
        # nor_std_test_x = pd.DataFrame(scaler.transform(std_test_x), columns = list(test_x.columns))

        # 표준화3(RobustScaler) : 중앙값 = 0 / IQR(1분위(25%) ~ 3분위(75%)) = 1 : 이상치(outlier) 영향 최소화 / 더 넓게 분포
        from sklearn.preprocessing import RobustScaler
        scaler = RobustScaler()
        Robust_train_x = pd.DataFrame(scaler.fit_transform(train_x), columns = list(train_x.columns))
        Robust_test_x = pd.DataFrame(scaler.transform(test_x), columns = list(test_x.columns))
        # ---------------------------------------------------------- #

        ####################### 변수 선택 과정 #######################

        ## <상관분석>
        # 상관관계는 train 데이터로만 구해야함(test 데이터 이용 X)
        corr_data = pd.concat([train_y,Robust_train_x], axis = 1)
        corr_rslt = corr_data.corr(method = 'pearson')  # default는 method = 'pearson'
        corr_rslt = corr_rslt.reset_index().rename(columns = {'index':'COLNM'})
        corr_rslt = corr_rslt.loc[corr_rslt['COLNM'] != y_colnm,]
        corr_rslt = corr_rslt[corr_rslt[y_colnm] >= 0.5]

        # x_corr = corr_rslt[['COLNM'] + list(corr_rslt['COLNM'])].set_index('COLNM')
        # x_corr[x_corr < 0.95]

        # 모델에 사용할 독립변수 목록 
        mdl_x_colnm = list(corr_rslt['COLNM'])

        # 모델에 사용할 train, test 데이터셋
        mdl_train_data = pd.concat([train_y, Robust_train_x], axis = 1)
        mdl_test_data = Robust_test_x

        # ---------------------------------------------------------- #
#         ## h2o 호출
#         h2o.init(nthreads=1)
        # ---------------------------------------------------------- #
        ## h2o 데이터프레임 형식으로 변환
        h2o_train_data = h2o.H2OFrame(mdl_train_data)
        h2o_test_data = h2o.H2OFrame(mdl_test_data)

        ## 모델 생성
#         start_time = time.time()
        model = H2OAutoML(max_models=20, max_runtime_secs=10, seed=1234)
        model.train(x = mdl_x_colnm, y = y_colnm,
                    training_frame = h2o_train_data)  # x : 독립변수 / y : 종속변수 / training_frame : 학습데이터 / 모델 검증은 pass
#         print('모델 생성 시간 : ', time.time() - start_time)
        # --------------------------------------------------------------- #
        # # View the AutoML Leaderboard
        # lb = model.leaderboard
        # lb.head(rows = 10)  # 가장 성능 좋은 모델 top 10개 확인
        # model.leader  # 리더보드 값 확인 : The leader model is stored here

        # ## 모델 조사
        # m = model.leader  # Get the best model using the metric
        # m = model.get_best_model()  # this is equivalent to

        ## AutoML 출력
        # Get leaderboard with all possible columns
        lb = h2o.automl.get_leaderboard(model, extra_columns = "ALL")  # lb : top 10개 모델에 대한 리더보드 확인
        save_lb = lb.as_data_frame()  # pandas 데이터프레임으로 형변환
        # --------------------------------------------------------------- #
        ## 예측 수행
        pred = model.predict(h2o_test_data)

        ## h2o 데이터프레임을 pandas 데이터프레임으로 변환
        pred = h2o.as_list(pred, use_pandas=True)  # 또는 pred.as_data_frame()
        pred.rename(columns={'predict':'PREDICT'}, inplace=True)
        # --------------------------------------------------------------------------------------- #
#         ## h2o 종료
#         h2o.cluster().shutdown()
        # ---------------------------------------------------------- #

        ## 결과값 정리
        rslt = pd.concat([pred, test_y], axis = 1)
        rslt['DIFF'] = rslt['PREDICT'] - rslt['SEP_CNT']
        rslt['target'] = (col1 + '_' + col2)
        rslt['mdl_x_colnm'] = str(mdl_x_colnm)
        rslt['BEST_MDL'] = save_lb['model_id'][0]
        rslt['MSE'] = (rslt['DIFF']**2)
        rslt['MSE'] = round(rslt['MSE'].mean(),4)
        rslt['stand_time'] = list(data.loc[~data['YEAR'].isin(train_year),'STAND_TIME'])
        rslt = rslt[['target', 'stand_time', 'PREDICT', 'SEP_CNT', 'MSE', 'BEST_MDL']]

        ## 결과값 저장
        if (col1 == COL1_list[0]) & (col2 == COL2_list[0]):
            col1_col2_rslt = rslt
        else:
            col1_col2_rslt = col1_col2_rslt.append(rslt)
            
col1_col2_rslt.to_csv('result/result_' + file_nm.split('/')[1], index=False, encoding = 'utf-8')

In [None]:
print('총 모델 생성 시간 : ', time.time() - total_start_time)

--------