In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import random
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings(action='ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(929) # Seed 고정

In [3]:
path = '/content/drive/MyDrive/2023_Dacon_Holiday/'
os.listdir(path)

['train.csv', 'test.csv', 'sample_submission.csv', 'Autogluon.ipynb']

In [4]:
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

In [5]:
!pip install autogluon

from autogluon.tabular import TabularDataset, TabularPredictor
import autogluon.core as ag

Collecting autogluon
  Downloading autogluon-0.8.2-py3-none-any.whl (9.7 kB)
Collecting autogluon.core[all]==0.8.2 (from autogluon)
  Downloading autogluon.core-0.8.2-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.0/224.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.features==0.8.2 (from autogluon)
  Downloading autogluon.features-0.8.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.tabular[all]==0.8.2 (from autogluon)
  Downloading autogluon.tabular-0.8.2-py3-none-any.whl (285 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.7/285.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.multimodal==0.8.2 (from autogluon)
  Downloading autogluon.multimodal-0.8.2-py3-none-any.whl (372 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37

In [11]:
train_data = TabularDataset(train)
test_data = TabularDataset(test)

label = '수요량'
eval_metric = 'rmse'
time_limit = 3600*6

In [12]:
predictor = TabularPredictor(
    label=label, problem_type='regression', eval_metric=eval_metric
).fit(train_data,
      presets='best_quality',
      num_stack_levels=3,
      time_limit=time_limit, num_gpus=1)

No path specified. Models will be saved in: "AutogluonModels/ag-20230928_113020/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 10800s
AutoGluon will save models to "AutogluonModels/ag-20230928_113020/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Aug 30 11:19:59 UTC 2023
Disk Space Avail:   49.47 GB / 83.96 GB (58.9%)
Train Data Rows:    5872
Train Data Columns: 9
Label Column: 수요량
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    11848.33 MB
	Train Data (Original)  Memory Usage: 3.32 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generat

In [13]:
predictor.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-119.101967,24.677409,4194.067938,0.000905,0.73569,3,True,22
1,WeightedEnsemble_L2,-119.189621,15.239895,1505.134602,0.000635,0.542661,2,True,12
2,NeuralNetFastAI_BAG_L2,-119.936996,22.938848,3586.649134,1.61575,450.451822,2,True,18
3,WeightedEnsemble_L4,-120.249615,33.911696,6183.143523,0.001023,1.013847,4,True,32
4,NeuralNetFastAI_BAG_L3,-120.59086,32.253742,5986.498762,1.805555,434.700709,3,True,28
5,ExtraTreesMSE_BAG_L2,-121.269534,21.684797,3140.60176,0.361699,4.404449,2,True,17
6,WeightedEnsemble_L5,-121.406022,42.330575,8498.649136,0.000871,0.647462,5,True,42
7,NeuralNetFastAI_BAG_L4,-122.015851,40.167085,8326.412282,1.354842,332.40887,4,True,38
8,CatBoost_BAG_L2,-122.938409,22.211768,3715.706741,0.88867,579.509429,2,True,16
9,RandomForestMSE_BAG_L2,-123.3018,21.810386,3158.966547,0.487288,22.769236,2,True,15


In [15]:
model_to_use = predictor.get_model_best()
model_pred = predictor.predict(test_data, model=model_to_use)

submission['수요량'] = model_pred
submission

Unnamed: 0,ID,수요량
0,TEST_0000,233.255219
1,TEST_0001,51.704460
2,TEST_0002,363.477661
3,TEST_0003,181.586121
4,TEST_0004,199.578430
...,...,...
3910,TEST_3910,1179.928955
3911,TEST_3911,299.705139
3912,TEST_3912,176.930115
3913,TEST_3913,1212.776978


In [None]:
predictor

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7881db0466e0>

In [16]:
submission.to_csv('1_Autogluon_3hr_submission.csv', index = False)