In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import random
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings(action='ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(828) # Seed 고정

In [4]:
path = '/content/drive/MyDrive/2023_Dacon_Cigarette/'
os.listdir(path)

['sample_submission.csv',
 'test.csv',
 'train.csv',
 'Baseline_Cigarettes.ipynb',
 'Autogluon.ipynb']

In [5]:
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

In [6]:
!pip install autogluon

from autogluon.tabular import TabularDataset, TabularPredictor
import autogluon.core as ag

Collecting autogluon
  Downloading autogluon-0.8.2-py3-none-any.whl (9.7 kB)
Collecting autogluon.core[all]==0.8.2 (from autogluon)
  Downloading autogluon.core-0.8.2-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.0/224.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.features==0.8.2 (from autogluon)
  Downloading autogluon.features-0.8.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.tabular[all]==0.8.2 (from autogluon)
  Downloading autogluon.tabular-0.8.2-py3-none-any.whl (285 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.7/285.7 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.multimodal==0.8.2 (from autogluon)
  Downloading autogluon.multimodal-0.8.2-py3-none-any.whl (372 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37

In [7]:
train_data = TabularDataset(train)
test_data = TabularDataset(test)

label = 'label'
eval_metric = 'accuracy'
time_limit = 3600*3

In [8]:
predictor = TabularPredictor(
    label=label, problem_type='binary', eval_metric=eval_metric
).fit(train_data,
      presets='best_quality',
      num_stack_levels=3,
      time_limit=time_limit, num_gpus=1)

No path specified. Models will be saved in: "AutogluonModels/ag-20230827_133607/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 10800s
AutoGluon will save models to "AutogluonModels/ag-20230827_133607/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   49.48 GB / 83.96 GB (58.9%)
Train Data Rows:    7000
Train Data Columns: 17
Label Column: label
Preprocessing data ...
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    12325.41 MB
	Train Data (Original)  Memory Usage: 1.37 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to ma

In [9]:
predictor.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L4,0.760143,30.16527,7254.656704,0.026607,4.437856,4,True,38
1,WeightedEnsemble_L5,0.759714,39.190135,9250.417569,0.021573,5.868734,5,True,50
2,XGBoost_BAG_L4,0.758,34.469732,8302.258062,0.460206,97.012448,4,True,47
3,CatBoost_BAG_L4,0.757286,34.112704,8408.303193,0.103179,203.057579,4,True,43
4,NeuralNetFastAI_BAG_L4,0.757286,35.30227,8554.573104,1.292744,349.32749,4,True,46
5,WeightedEnsemble_L3,0.757,16.304888,4033.832285,0.024003,4.445941,3,True,26
6,XGBoost_BAG_L3,0.756143,24.682253,5879.264505,0.997666,156.505168,3,True,35
7,LightGBM_BAG_L3,0.756,24.399364,5888.286261,0.714777,165.526923,3,True,28
8,NeuralNetTorch_BAG_L4,0.755857,34.617214,8689.063569,0.607689,483.817955,4,True,48
9,LightGBMXT_BAG_L4,0.755714,34.518016,8306.032501,0.50849,100.786888,4,True,39


In [10]:
model_to_use = predictor.get_model_best()
model_pred = predictor.predict(test_data, model=model_to_use)

submission['label'] = model_pred
submission

Unnamed: 0,ID,label
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,1
3,TEST_0003,1
4,TEST_0004,0
...,...,...
2995,TEST_2995,0
2996,TEST_2996,0
2997,TEST_2997,0
2998,TEST_2998,1


In [None]:
predictor

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7881db0466e0>

In [11]:
submission.to_csv('9_Autogluon_3hr_828_submission.csv', index = False)