In [1]:
import pandas as pd
import numpy as np
import random
import os
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings(action='ignore')

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(96) # Seed 고정

In [3]:
pip install --user mljar-supervised

Note: you may need to restart the kernel to use updated packages.


In [4]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [5]:
train.drop("ID", axis = 1, inplace = True)
test.drop("ID", axis = 1, inplace = True)

In [6]:
x = train.drop("critical_temp", axis = 1)
y = train["critical_temp"]

In [7]:
def NMAE(true, pred, sample_weight=None):
    mae = np.mean(np.abs(true - pred))
    score = mae / np.mean(np.abs(true))
    return score

In [8]:
from supervised.automl import AutoML
automl = AutoML(mode = "Compete",
                algorithms = ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost'],
                ml_task = "regression", eval_metric=NMAE, random_state = 42, total_time_limit=43200)

In [9]:
automl.fit(x, y)

AutoML directory: AutoML_4
The task is regression with evaluation metric user_defined_metric
AutoML will use algorithms: ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost']
AutoML will stack models
AutoML will ensemble available models
AutoML steps: ['adjust_validation', 'simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'kmeans_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'boost_on_errors', 'ensemble', 'stack', 'ensemble_stacked']
* Step adjust_validation will try to check up to 1 model
1_DecisionTree user_defined_metric 0.354634 trained in 1.12 seconds
Adjust validation. Remove: 1_DecisionTree
Validation strategy: 10-fold CV Shuffle
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 4 models
1_Default_LightGBM user_defined_metric 0.157634 trained in 178.14 seconds
2_Default_Xgboost user_defined_metric 0.160267 trained in 277.29 seconds
3_Default_

AutoML(algorithms=['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost'],
       eval_metric=<function NMAE at 0x0000018C7F7679D0>, ml_task='regression',
       mode='Compete', random_state=42, total_time_limit=43200)

In [10]:
pred = automl.predict_all(test)

In [11]:
pred

Unnamed: 0,prediction
0,1.719580
1,4.945035
2,12.494085
3,46.916921
4,3.824118
...,...
8499,31.181901
8500,21.840439
8501,11.432821
8502,19.559758


In [12]:
sample = pd.read_csv("sample_submission.csv")

In [13]:
sample["critical_temp"] = pred["prediction"]

In [15]:
sample.to_csv("0821_03(automl12시간_모델4개).csv", index = False)