# 라이브러리 설치

In [None]:
pip install --user mljar-supervised

Note: you may need to restart the kernel to use updated packages.


# 라이브러리 로딩 및 초기화

In [None]:
import pandas as pd
import numpy as np
import random
import os
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings(action='ignore')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(96) # Seed 고정

# 구글 드라이브 연동

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 데이터 로딩 및 요약

In [None]:
train = pd.read_csv("/content/drive/MyDrive/데이콘/초전도체/train.csv")
test = pd.read_csv("/content/drive/MyDrive/데이콘/초전도체/test.csv")

In [None]:
train.drop("ID", axis = 1, inplace = True)
test.drop("ID", axis = 1, inplace = True)

In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12759 entries, 0 to 12758
Data columns (total 82 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   number_of_elements               12759 non-null  int64  
 1   mean_atomic_mass                 12759 non-null  float64
 2   wtd_mean_atomic_mass             12759 non-null  float64
 3   gmean_atomic_mass                12759 non-null  float64
 4   wtd_gmean_atomic_mass            12759 non-null  float64
 5   entropy_atomic_mass              12759 non-null  float64
 6   wtd_entropy_atomic_mass          12759 non-null  float64
 7   range_atomic_mass                12759 non-null  float64
 8   wtd_range_atomic_mass            12759 non-null  float64
 9   std_atomic_mass                  12759 non-null  float64
 10  wtd_std_atomic_mass              12759 non-null  float64
 11  mean_fie                         12759 non-null  float64
 12  wtd_mean_fie      

In [None]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8504 entries, 0 to 8503
Data columns (total 81 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   number_of_elements               8504 non-null   int64  
 1   mean_atomic_mass                 8504 non-null   float64
 2   wtd_mean_atomic_mass             8504 non-null   float64
 3   gmean_atomic_mass                8504 non-null   float64
 4   wtd_gmean_atomic_mass            8504 non-null   float64
 5   entropy_atomic_mass              8504 non-null   float64
 6   wtd_entropy_atomic_mass          8504 non-null   float64
 7   range_atomic_mass                8504 non-null   float64
 8   wtd_range_atomic_mass            8504 non-null   float64
 9   std_atomic_mass                  8504 non-null   float64
 10  wtd_std_atomic_mass              8504 non-null   float64
 11  mean_fie                         8504 non-null   float64
 12  wtd_mean_fie        

In [None]:
x = train.drop("critical_temp", axis = 1)
y = train["critical_temp"]

# eval_metric 설정

In [None]:
def NMAE(true, pred, sample_weight=None):
    mae = np.mean(np.abs(true - pred))
    score = mae / np.mean(np.abs(true))
    return score

# 모델 초기화 및 훈련(mljar-supervised)

In [None]:
from supervised.automl import AutoML
automl = AutoML(mode = "Compete",
                algorithms = ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost'],
                ml_task = "regression", eval_metric=NMAE, random_state = 42, total_time_limit=43200)

In [None]:
automl.fit(x, y)

AutoML directory: AutoML_4
The task is regression with evaluation metric user_defined_metric
AutoML will use algorithms: ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost']
AutoML will stack models
AutoML will ensemble available models
AutoML steps: ['adjust_validation', 'simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'kmeans_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'boost_on_errors', 'ensemble', 'stack', 'ensemble_stacked']
* Step adjust_validation will try to check up to 1 model
1_DecisionTree user_defined_metric 0.354634 trained in 1.12 seconds
Adjust validation. Remove: 1_DecisionTree
Validation strategy: 10-fold CV Shuffle
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 4 models
1_Default_LightGBM user_defined_metric 0.157634 trained in 178.14 seconds
2_Default_Xgboost user_defined_metric 0.160267 trained in 277.29 seconds
3_Default_

AutoML(algorithms=['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost'],
       eval_metric=<function NMAE at 0x0000018C7F7679D0>, ml_task='regression',
       mode='Compete', random_state=42, total_time_limit=43200)

# 예측 및 저장

In [None]:
pred = automl.predict_all(test)

In [None]:
sample = pd.read_csv("/content/drive/MyDrive/데이콘/초전도체/sample_submission.csv")

In [None]:
sample["critical_temp"] = pred["prediction"]

In [None]:
sample.to_csv("0821_03(automl12시간_모델4개).csv", index = False)