# library

In [23]:
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from xgboost import XGBClassifier

In [24]:
class Model:
    def __init__(self):
        """ 생성자 """
        self._train = pd.read_csv('./data/train.csv')
        self._test = pd.read_csv('./data/test.csv')
        self._submit = pd.read_csv('./data/sample_submission.csv')

        self.result = None
        
        self.model = XGBClassifier()
    
    def preprocessing(self, df):
        """ 전처리기 """
        categoric_col = df.select_dtypes(include='object').columns # 범주형
        numeric_col = df.select_dtypes(include='int64').columns # 수치형
        
        # 범주형 수치형으로 변환
        ordinal_encoder = OrdinalEncoder()
        df[categoric_col] = ordinal_encoder.fit_transform(df[categoric_col])
        return df
        
    def train(self):
        """ model 훈련 """
        # train 전처리
        self._train = self.preprocessing(self._train) 

        # train 분리
        X = self._train.drop(['대출등급'],axis = 1)
        Y = self._train['대출등급']
        x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2 ,random_state=42,stratify=Y)
        
        # model 훈련 및 평가
        self.model.fit(x_train,y_train)
        y_pred = self.model.predict(x_test)
        score = f1_score(y_test,y_pred, average="macro")
        print(f"[Score]: {score:.2f}")
        
    def test(self):
        """ model 추론 """
        # test 전처리
        self._test = self.preprocessing(self._test) 
        
        # model 예측
        real = self.model.predict(self._test)
        self.result = np.where(real == 0, 'A', 
                      np.where(real == 1, 'B',
                      np.where(real == 2, 'C',
                      np.where(real == 3, 'D',
                      np.where(real == 4, 'E', 'F')))))
        
    def submit(self):
        """ 제출 """
        # csv 저장
        self._submit['대출등급'] = self.result
        self._submit.to_csv("submit.csv",index = False)
        
        # 모델 저장
        with open('model.pkl', 'wb') as file:
            pickle.dump(self.model, file)

In [25]:
model = Model()

In [26]:
model.train()

[Score]: 0.78


In [27]:
model.test()

In [28]:
model.submit()