In [1]:
# PyCaret 라이브러리 설치
!pip install pycaret
!pip install markupsafe==2.0.1
!pip install catboost


# 베이지안 탐색 라이브러리
!pip install scikit-optimize

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting markupsafe~=2.1.1
  Using cached MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)
Installing collected packages: markupsafe
  Attempting uninstall: markupsafe
    Found existing installation: MarkupSafe 2.0.1
    Uninstalling MarkupSafe-2.0.1:
      Successfully uninstalled MarkupSafe-2.0.1
Successfully installed markupsafe-2.1.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting markupsafe==2.0.1
  Using cached MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (31 kB)
Installing collected packages: markupsafe
  Attempting uninstall: markupsafe
    Found existing installation: MarkupSafe 2.1.1
    Uninstalling MarkupSafe-2.1.1:
      Successfully uninstalled MarkupSafe-2.1.1
[31mERROR: pip's dependency resolver does not cu

In [2]:
import os

import numpy as np
import pandas as pd

import seaborn as sns 
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize']=(10,10)
plt.rcParams['font.family']='AppleGothic'

import warnings
warnings.filterwarnings(action='ignore')

In [3]:
def read_csv_by_dir(path, index_col=None):
    df_raw = pd.DataFrame()
    for files in os.listdir(path):
        if files.endswith('.csv'):
            df = pd.read_csv('/'.join([path,files]),
                            index_col=index_col)
        df_raw = pd.concat((df_raw,df),axis=0)
    return df_raw

In [4]:
from google.colab import drive
drive.mount('/content/drive') # 구글 드라이브를 사용하는 경우

path = '/content/drive/MyDrive/dacon/water_level/data/'

_df_rf_raw = read_csv_by_dir('/'.join([path,'rf_data']),
                            index_col=0)

_df_water_raw = read_csv_by_dir('/'.join([path,'water_data']),
                               index_col=0)

_submission_raw = pd.read_csv('/'.join([path,'sample_submission.csv']),
                             index_col=0)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# raw_data 보존하기
df_rf=_df_rf_raw.copy()
df_rf.name = "rain_data"

df_water=_df_water_raw.copy()
df_water.name = "water_data"

submission=_submission_raw.copy()
submission.name = "submission"

In [6]:
def index_to_datetime(df,format):
    df.index = pd.to_datetime(df.index,
                              format=format)
    return df

In [7]:
df_rf=index_to_datetime(df=df_rf,format='%Y-%m-%d %H:%M')
df_water=index_to_datetime(df=df_water,format='%Y-%m-%d %H:%M')
submission=index_to_datetime(df=submission,format='%Y-%m-%d %H:%M')

In [8]:
df_rf.sort_index(inplace=True)
df_water.sort_index(inplace=True)
submission.sort_index(inplace=True)

In [9]:
# 데이터 시간대 확인하기
def check_datetime(df):
    print(df.name)
    print(df.select_dtypes('datetime64[ns]').head(1).index[0])
    print(df.select_dtypes('datetime64[ns]').tail(1).index[0])
    return None

check_datetime(df_rf)
check_datetime(df_water)
check_datetime(submission)

rain_data
2012-05-01 00:00:00
2022-07-18 23:50:00
water_data
2012-05-01 00:00:00
2022-07-18 23:50:00
submission
2022-06-01 00:00:00
2022-07-18 23:50:00


In [10]:
data = pd.concat((df_rf,df_water),axis=1)

In [11]:
data.shape

(276336, 17)

In [12]:
# data와 target 하나 밀어주기 (과거데이터를 사용해야 함으로)
_data = data.reset_index(drop=True)

_data.index += 1
tot=_data.sort_index()
tot=tot.iloc[1:-1]

In [13]:
tot.isna().sum()

rf_10184100         0
rf_10184110         0
rf_10184140         0
swl               743
inf               743
sfw               743
ecpc              743
tototf            743
tide_level       4927
wl_1018662         59
fw_1018662      16380
wl_1018680         59
fw_1018680     196848
wl_1018683         59
fw_1018683       1279
wl_1019630         59
fw_1019630         59
dtype: int64

In [14]:
pred_col = submission.columns
pred_col

Index(['wl_1018662', 'wl_1018680', 'wl_1018683', 'wl_1019630'], dtype='object')

In [15]:
tot.fillna(tot.mean(),inplace=True)

In [16]:
train_data=tot.iloc[:-len(submission),:]
test_data=tot.iloc[-len(submission):,:]

In [17]:
test_data.isna().sum()

rf_10184100    0
rf_10184110    0
rf_10184140    0
swl            0
inf            0
sfw            0
ecpc           0
tototf         0
tide_level     0
wl_1018662     0
fw_1018662     0
wl_1018680     0
fw_1018680     0
wl_1018683     0
fw_1018683     0
wl_1019630     0
fw_1019630     0
dtype: int64

In [18]:
test_data.shape

(6912, 17)

In [19]:
submission.shape

(6912, 4)

In [20]:
from pycaret.regression import *

s1 = setup(data=train_data.drop(pred_col.drop('wl_1018662'), axis=1), target = 'wl_1018662', train_size=0.8,
           normalize=True,
                   normalize_method ='robust',
                   transformation=True,
             silent = True, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,wl_1018662
2,Original Data,"(269422, 14)"
3,Missing Values,False
4,Numeric Features,13
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(215537, 12)"


In [21]:
s1_top3 = compare_models(n_select = 3, fold=5, sort="RMSE", exclude=["knn", "ada", "gbr"])

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,0.0764,0.2319,0.4798,1.0,0.0014,0.0002,34.504
rf,Random Forest Regressor,0.0796,0.416,0.6398,0.9999,0.0014,0.0002,67.444
dt,Decision Tree Regressor,0.0948,0.5678,0.7514,0.9999,0.0018,0.0003,0.888
catboost,CatBoost Regressor,0.4793,2.3223,1.4893,0.9996,0.0027,0.0013,33.578
lightgbm,Light Gradient Boosting Machine,0.4325,2.8046,1.6598,0.9995,0.0029,0.0012,1.574
gbr,Gradient Boosting Regressor,1.873,11.4008,3.376,0.998,0.0092,0.0055,38.64
knn,K Neighbors Regressor,2.717,33.5287,5.7883,0.9942,0.0135,0.0075,47.574
ada,AdaBoost Regressor,13.8927,288.2311,16.9721,0.9504,0.055,0.045,24.126
ridge,Ridge Regression,25.2594,1534.7052,39.1711,0.7359,0.0946,0.0718,0.064
lar,Least Angle Regression,25.2596,1534.7052,39.1711,0.7359,0.0946,0.0718,0.072


In [24]:
# tuned_mode11 = [tune_model(i, choose_better=True) for i in s1_top3]

# blended_model1 = blend_models(estimator_list=tuned_model1, optimize='rmse', choose_better=True)

INFO:logs:Initializing tune_model()
INFO:logs:tune_model(estimator=ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=-1, oob_score=False,
                    random_state=123, verbose=0, warm_start=False), fold=None, round=4, n_iter=10, custom_grid=None, optimize=R2, custom_scorer=None, search_library=scikit-learn, search_algorithm=None, early_stopping=False, early_stopping_max_iters=10, choose_better=True, fit_kwargs=None, groups=None, return_tuner=False, verbose=True, tuner_verbose=True, display=None, return_train_score=False, kwargs={})
INFO:logs:Checking exceptions
INFO:logs:Preparing display monitor


IntProgress(value=0, description='Processing: ', max=7)

Unnamed: 0,Fold,MAE,MSE,RMSE,R2,RMSLE,MAPE


INFO:logs:Copying training dataset
INFO:logs:Checking base model
INFO:logs:Base model : Extra Trees Regressor
INFO:logs:Declaring metric variables
INFO:logs:Defining Hyperparameters
INFO:logs:Tuning with n_jobs=-1
INFO:logs:Initializing RandomizedSearchCV


Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  6.7min


KeyboardInterrupt: ignored

In [26]:
blended_model1 = create_model('dt')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0851,0.4199,0.648,0.9999,0.0016,0.0002
1,0.095,0.623,0.7893,0.9999,0.0021,0.0003
2,0.0958,0.5423,0.7364,0.9999,0.0017,0.0003
3,0.0866,0.484,0.6957,0.9999,0.0016,0.0002
4,0.0934,0.4875,0.6982,0.9999,0.0017,0.0003
5,0.0982,0.6733,0.8206,0.9999,0.0018,0.0003
6,0.0903,0.5306,0.7284,0.9999,0.0017,0.0003
7,0.0894,0.4588,0.6774,0.9999,0.0016,0.0003
8,0.0984,0.7729,0.8792,0.9999,0.0018,0.0003
9,0.0848,0.3899,0.6244,0.9999,0.0016,0.0002


INFO:logs:create_model_container: 20
INFO:logs:master_model_container: 20
INFO:logs:display_container: 3
INFO:logs:DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best')
INFO:logs:create_model() succesfully completed......................................


In [27]:
from pycaret.regression import *

s2 = setup(data=train_data.drop(pred_col.drop('wl_1018680'), axis=1), target = 'wl_1018680', train_size=0.8,
           normalize=True,
                   normalize_method ='robust',
                   transformation=True,
             silent = True, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,wl_1018680
2,Original Data,"(269422, 14)"
3,Missing Values,False
4,Numeric Features,13
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(215537, 12)"


INFO:logs:create_model_container: 0
INFO:logs:master_model_container: 0
INFO:logs:display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=False, features_todrop=[],
                                      id_columns=[], ml_usecase='regression',
                                      numerical_features=[],
                                      target='wl_1018680', time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numerical=None,
                                numeric_stra...
                                                  random_state_quantile=123,
                                                  target='wl_1018680')),
                ('binn', 'passthrough'), ('rem_outliers', 'pas

In [None]:
# s2_top3 = compare_models(n_select = 3, fold=5, sort="RMSE", exclude=["knn", "ada", "gbr"])

In [None]:
# tuned_mode12 = [tune_model(i, choose_better=True) for i in s2_top3]

# blended_model2 = blend_models(estimator_list=tuned_model2, optimize='rmse', choose_better=True)

In [28]:
blended_model2 = create_model('dt')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.3503,38.739,6.2241,0.9912,0.0188,0.0104
1,3.3168,38.3602,6.1936,0.9917,0.0187,0.0103
2,3.2519,35.6857,5.9737,0.9922,0.0182,0.0101
3,3.3548,38.453,6.201,0.9908,0.0188,0.0104
4,3.3166,38.7474,6.2247,0.9915,0.0187,0.0102
5,3.317,37.7263,6.1422,0.9916,0.0185,0.0102
6,3.3777,39.1931,6.2604,0.9915,0.0189,0.0105
7,3.3048,38.3396,6.1919,0.9912,0.0185,0.0102
8,3.3018,37.7476,6.1439,0.9922,0.0186,0.0102
9,3.3689,38.9836,6.2437,0.9917,0.0189,0.0104


INFO:logs:create_model_container: 1
INFO:logs:master_model_container: 1
INFO:logs:display_container: 2
INFO:logs:DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best')
INFO:logs:create_model() succesfully completed......................................


In [29]:
from pycaret.regression import *

s3 = setup(data=train_data.drop(pred_col.drop('wl_1018683'), axis=1), target = 'wl_1018683', train_size=0.8,
           normalize=True,
                   normalize_method ='robust',
                   transformation=True,
             silent = True, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,wl_1018683
2,Original Data,"(269422, 14)"
3,Missing Values,False
4,Numeric Features,13
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(215537, 12)"


INFO:logs:create_model_container: 0
INFO:logs:master_model_container: 0
INFO:logs:display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=False, features_todrop=[],
                                      id_columns=[], ml_usecase='regression',
                                      numerical_features=[],
                                      target='wl_1018683', time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numerical=None,
                                numeric_stra...
                                                  random_state_quantile=123,
                                                  target='wl_1018683')),
                ('binn', 'passthrough'), ('rem_outliers', 'pas

In [None]:
# s3_top3 = compare_models(n_select = 3, fold=5, sort="RMSE", exclude=["knn", "ada", "gbr"])

In [None]:
# tuned_mode13 = [tune_model(i, choose_better=True) for i in s3_top3]

# blended_model3 = blend_models(estimator_list=tuned_model3, optimize='rmse', choose_better=True)

In [30]:
blended_model3 = create_model('dt')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2.2266,13.4431,3.6665,0.9966,0.0111,0.0069
1,2.2079,13.2163,3.6354,0.9968,0.011,0.0069
2,2.2209,13.7276,3.7051,0.9966,0.0112,0.0069
3,2.1947,12.8547,3.5853,0.9966,0.0109,0.0069
4,2.2131,13.2732,3.6432,0.9967,0.011,0.0069
5,2.2082,13.3092,3.6482,0.9967,0.0109,0.0069
6,2.2069,13.0253,3.6091,0.9968,0.0109,0.0069
7,2.1883,12.6998,3.5637,0.9968,0.0108,0.0068
8,2.1764,12.9119,3.5933,0.997,0.0108,0.0068
9,2.2292,13.734,3.7059,0.9967,0.0111,0.0069


INFO:logs:create_model_container: 1
INFO:logs:master_model_container: 1
INFO:logs:display_container: 2
INFO:logs:DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best')
INFO:logs:create_model() succesfully completed......................................


In [31]:
from pycaret.regression import *

s4 = setup(data=train_data.drop(pred_col.drop('wl_1019630'), axis=1), target = 'wl_1019630', train_size=0.8,
           normalize=True,
                   normalize_method ='robust',
                   transformation=True,
             silent = True, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,wl_1019630
2,Original Data,"(269422, 14)"
3,Missing Values,False
4,Numeric Features,13
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(215537, 12)"


INFO:logs:create_model_container: 0
INFO:logs:master_model_container: 0
INFO:logs:display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=False, features_todrop=[],
                                      id_columns=[], ml_usecase='regression',
                                      numerical_features=[],
                                      target='wl_1019630', time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numerical=None,
                                numeric_stra...
                                                  random_state_quantile=123,
                                                  target='wl_1019630')),
                ('binn', 'passthrough'), ('rem_outliers', 'pas

In [None]:
# s4_top3 = compare_models(n_select = 3, fold=5, sort="RMSE", exclude=["knn", "ada", "gbr"])

In [None]:
# tuned_mode14 = [tune_model(i, choose_better=True) for i in s4_top3]

# blended_model4 = blend_models(estimator_list=tuned_model4, optimize='rmse', choose_better=True)

In [32]:
blended_model4 = create_model('dt')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0004,0.0004,0.0204,1.0,0.0,0.0
1,0.0009,0.0018,0.042,1.0,0.0001,0.0
2,0.0013,0.0025,0.0501,1.0,0.0001,0.0
3,0.0006,0.0006,0.0255,1.0,0.0,0.0
4,0.0014,0.0061,0.0783,1.0,0.0001,0.0
5,0.0019,0.0078,0.0883,1.0,0.0001,0.0
6,0.001,0.0015,0.0385,1.0,0.0001,0.0
7,0.001,0.0064,0.0803,1.0,0.0001,0.0
8,0.001,0.0014,0.0379,1.0,0.0001,0.0
9,0.0021,0.0304,0.1745,1.0,0.0002,0.0


INFO:logs:create_model_container: 1
INFO:logs:master_model_container: 1
INFO:logs:display_container: 2
INFO:logs:DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best')
INFO:logs:create_model() succesfully completed......................................


In [33]:
prediction1 = predict_model(blended_model1, data = test_data)
prediction2 = predict_model(blended_model2, data = test_data)
prediction3 = predict_model(blended_model3, data = test_data)
prediction4 = predict_model(blended_model4, data = test_data)

INFO:logs:Initializing predict_model()
INFO:logs:predict_model(estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best'), probability_threshold=None, encoded_labels=True, drift_report=False, raw_score=False, round=4, verbose=True, ml_usecase=MLUsecase.REGRESSION, display=None, drift_kwargs=None)
INFO:logs:Checking exceptions
INFO:logs:Preloading libraries
INFO:logs:Preparing display monitor


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Decision Tree Regressor,334.5987,120862.166,347.6524,-11434.7551,5.7921,0.0311


INFO:logs:Initializing predict_model()
INFO:logs:predict_model(estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best'), probability_threshold=None, encoded_labels=True, drift_report=False, raw_score=False, round=4, verbose=True, ml_usecase=MLUsecase.REGRESSION, display=None, drift_kwargs=None)
INFO:logs:Checking exceptions
INFO:logs:Preloading libraries
INFO:logs:Preparing display monitor


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Decision Tree Regressor,323.7619,111476.6585,333.8812,-10546.7157,5.7633,0.0292


INFO:logs:Initializing predict_model()
INFO:logs:predict_model(estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best'), probability_threshold=None, encoded_labels=True, drift_report=False, raw_score=False, round=4, verbose=True, ml_usecase=MLUsecase.REGRESSION, display=None, drift_kwargs=None)
INFO:logs:Checking exceptions
INFO:logs:Preloading libraries
INFO:logs:Preparing display monitor


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Decision Tree Regressor,319.7228,108057.2985,328.7207,-10223.1822,5.7523,0.0285


INFO:logs:Initializing predict_model()
INFO:logs:predict_model(estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best'), probability_threshold=None, encoded_labels=True, drift_report=False, raw_score=False, round=4, verbose=True, ml_usecase=MLUsecase.REGRESSION, display=None, drift_kwargs=None)
INFO:logs:Checking exceptions
INFO:logs:Preloading libraries
INFO:logs:Preparing display monitor


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Decision Tree Regressor,302.9446,95524.3839,309.0702,-9037.3409,5.7029,0.0


In [59]:
_submission_raw[pred_col[0]] = prediction1['Label'].values
_submission_raw[pred_col[1]] = prediction2['Label'].values
_submission_raw[pred_col[2]] = prediction3['Label'].values
_submission_raw[pred_col[3]] = prediction4['Label'].values

In [60]:
_submission_raw.to_csv('/content/drive/MyDrive/dacon/water_level/submission_dt_test.csv')

In [61]:
prediction1.shape

(6912, 18)

In [62]:
_submission_raw.shape

(6912, 4)

In [63]:
_submission_raw.isna().sum()

wl_1018662    0
wl_1018680    0
wl_1018683    0
wl_1019630    0
dtype: int64

In [64]:
_submission_raw

Unnamed: 0_level_0,wl_1018662,wl_1018680,wl_1018683,wl_1019630
ymdhm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-01 00:00,278.700012,278.200012,278.0,270.299988
2022-06-01 00:10,278.700012,278.200012,278.0,269.299988
2022-06-01 00:20,280.700012,278.200012,269.0,267.299988
2022-06-01 00:30,283.700012,268.200012,278.0,264.299988
2022-06-01 00:40,284.700012,265.200012,274.0,261.299988
...,...,...,...,...
2022-07-18 23:10,287.700012,292.200012,293.0,303.299988
2022-07-18 23:20,287.700012,294.200012,299.0,306.299988
2022-07-18 23:30,286.700012,311.200012,305.0,308.299988
2022-07-18 23:40,298.700012,302.200012,307.0,310.299988


In [None]:
# 리더보드 점수: 6.48131