# Development Environment
Google Colab
- Platfrom : Linux-5.10.147+-x86_64-with-glibc2.29
- OS:Ubuntu 20.04.5 LTS
- CPU : Intel(R) Xeon(R) CPU @ 2.20GHz

# Python Library Version
- Python 3.8.10
- pandas 1.3.5
- numpy 1.22.4
- sklearn 1.2.1
- xgboost 1.7.4
- lightgbm 2.2.3


In [None]:
!python -V

Python 3.8.10


In [None]:
#-*- coding:utf-8 -*-

import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(37) # Fix the Seed 

In [None]:
train_df = pd.read_csv('/data/train.csv') # train data load
test_df = pd.read_csv('/data/test.csv') # test data load

In [None]:
train_x = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class', 'Y_Quality']) # Remove columns that will not be used as features
train_y = train_df['Y_Class'] # target

test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP']) # Remove columns that will not be used as features

In [None]:
# Replace missing values with 0
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [None]:
qual_col = ['LINE', 'PRODUCT_CODE'] # numerical하지 않은 column

# label encoding
for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

Done.


In [None]:
# library download
!pip install xgboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# version 확인
import xgboost
xgboost.__version__

'1.7.4'

In [None]:
from xgboost import XGBClassifier # library import

xgb_clf = XGBClassifier(random_state=37).fit(train_x, train_y) # training
preds = xgb_clf.predict(test_x) # test data prediction

In [None]:
preds

array([1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,

In [None]:
submit = pd.read_csv('/data/sample_submission.csv') # Submit template Load

In [None]:
submit['Y_Class'] = preds # Enter submission data

In [None]:
submit.to_csv('/data/XGB.csv', index=False) # File for submission

In [None]:
xgb_clf.save_model("/data/XGB_model.json") # Save model