# ColumnTransformer
* Column별 서로 다른 스케일링, 인코딩 자동 + 일괄 훈련 및 적용 가능

## Import

In [None]:
pip install mglearn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import mglearn

plt.rc('figure', figsize=(10, 6))

from matplotlib import rcParams
rcParams['font.family'] = 'DejaVu Sans'
rcParams['font.size'] = 10
rcParams['axes.unicode_minus'] = False

## 컬럼별 스케일링, 인코딩 적용 - 지도학습

### 컬럼 변환기 생성

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(
    [('scaling', StandardScaler(), ['age', 'hours-per-week']),  # 어떤 열에 어떤 Scaling 혹은 Encoding 적용할 것인지 세팅
     ('onehot',  OneHotEncoder(sparse=False), ['workclass', 'education', 'gender', 'occupation'])])

### 데이터 분할

In [None]:
from sklearn.model_selection import train_test_split

X = df_ad.drop('income', axis=1)
y = df_ad['income']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

### 컬럼 변환(스케일링, 인코딩)

In [None]:
# 컬럼 변환 학습(스케일링, 인코딩)
ct.fit(X_train)

# 데이터 변환
X_train_trans = ct.transform(X_train)
X_test_trans  = ct.transform(X_test)

print(X_train_trans.shape)
print(X_test_trans.shape)

(24420, 44)
(8141, 44)




### Example: LogisticRegression - 컬럼 변환 적용(스케일링, 인코딩)

In [None]:
from sklearn.linear_model import LogisticRegression

# 모델 생성
model = LogisticRegression(max_iter=1000)

# 모델 학습
model.fit(X_train_trans, y_train)

# 모델 평가
model.score(X_test_trans, y_test)

0.8177128116938951

### make_column_transformer 사용 (ColumnTransformer 생성)

In [None]:
from sklearn.compose import make_column_transformer

ct = make_column_transformer(
    (StandardScaler(), ['age', 'hours-per-week']),
    (OneHotEncoder(sparse=False), ['workclass', 'education', 'gender', 'occupation']))