## Pycaret Activate

In [None]:
!pip install --pre pycaret

In [None]:
!pip install cudf-cu11 dask-cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
!pip install cuml-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
!pip install cugraph-cu11 --extra-index-url=https://pypi.ngc.nvidia.com

In [None]:
!pip install catboost

## Import

In [None]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(37) # Seed 고정

## Data Load

In [None]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [None]:
train_x1 = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class'])
train_x2 = train_df[['Y_Quality', 'Y_Class']]

test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP'])

In [None]:
train_x1

In [None]:
train_x2

## Data Pre-processing

In [None]:
train_x1 = train_x1.fillna(0)
test_x = test_x.fillna(0)

In [None]:
# qualitative to quantitative
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x1[i])
    train_x1[i] = le.transform(train_x1[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

## Classification Model Fit

In [None]:
from pycaret import regression as reg

# model setup
setup_regression = reg.setup(data = train_x1, target = 'Y_Quality') 

print("Complete")

In [None]:
# 모델 성능 비교하는 부분 (스킵)

reg.models()

In [None]:
best_model_regression = reg.compare_models(exclude = ['br', 'lar', 'rf', 'et', 'par', 'huber', 'catboost'], n_select = 5)
# top5 = reg.compare_models(exclude = ['br', 'lar', 'rf', 'et', 'par', 'huber', 'catboost', 'dummy'], n_select = 5)
# best3models = compare_models(sort='Accuracy', n_select=3, fold=10)

# best_model_regression = reg.create_model('gbr')

In [None]:
# 모델 튜닝
# models = []
# for m in top5:
#     models.append(reg.tune_model(m, optimize = 'MAE', choose_better = True, n_iter = 30))

In [None]:
# best_model_regression

In [None]:
tuned_model_regression = reg.tune_model(best_model_regression)

In [None]:
# 모델 앙상블 (여러 모델 사용 시)
# voting = reg.blend_models(models, optimize = 'MAE')
# voting = reg.tune_model(voting, optimize = 'MAE', choose_better = True, n_iter = 30)

In [None]:
# 모델 학습
# final_model_regression = reg.finalize_model(voting)
final_model_regression = reg.finalize_model(tuned_model_regression)

## Inference

In [None]:
pred_y1 = reg.predict_model(final_model_regression, data=test_x)
print('Done.')

In [None]:
pred_y1

In [None]:
pred_y1 = pd.DataFrame(pred_y1['prediction_label'])

In [None]:
pred_y1

In [None]:
from pycaret import classification as cls

# model setup
setup_classification = cls.setup(data = train_x2, target = 'Y_Class',)

print("Complete")

In [None]:
# 모델 성능 비교하는 부분 (스킵)

cls.models()

In [None]:
best_model_classification = cls.compare_models()
# best3models = compare_models(sort='Accuracy', n_select=3, fold=10)

In [None]:
# 모델 튜닝
tuned_model_classification = cls.tune_model(best_model_classification)

In [None]:
# 모델 앙상블 (여러 모델 사용 시)
# blended = blend_models(estimator_list=best3models, fold=10, method='soft')

In [None]:
# 모델 학습
final_model_classification = cls.finalize_model(tuned_model_classification)

In [None]:
pred_y1

In [None]:
pred_y1.rename(columns={'prediction_label':'Y_Quality'}, inplace = True)

In [None]:
pred_y = cls.predict_model(final_model_classification, data=pred_y1)

## Submit

In [None]:
pred_y

In [None]:
submit = pd.read_csv('sample_submission.csv')

In [None]:
submit['Y_Class'] = pd.DataFrame(pred_y['prediction_label'])

In [None]:
submit.to_csv('submission_blackcows_regression.csv', index=False)