# Column Transformer

In [65]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, LabelEncoder

In [66]:
df = pd.read_csv(r'D:\Data Analytics\Python Scripts\Statistics\Encoding\customer.csv')
df.head()

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No


In [67]:
X_train, X_test, Y_train, Y_test = train_test_split(
    df.iloc[:, 0:4],
    df.iloc[:, -1],
    test_size=0.3
)

X_train.head()

Unnamed: 0,age,gender,review,education
44,77,Female,Average,UG
38,45,Female,Good,School
9,74,Male,Good,UG
14,15,Male,Poor,PG
22,18,Female,Poor,PG


In [68]:
ctfr = ColumnTransformer(
    transformers=[
        (
            "tnf1",
            OrdinalEncoder(
                categories=[["Poor", "Average", "Good"], ["School", "UG", "PG"]],
                dtype=int,
            ),
            ["review", "education"],
        ),
        (
            "tnf3",
            OneHotEncoder(sparse_output=False, drop="first", dtype=int),
            ["gender"],
        )
    ],
    remainder="passthrough",
)

In [69]:
X_train = ctfr.fit_transform(X_train)

In [70]:
X_test = ctfr.transform(X_test)

In [72]:
pd.DataFrame(X_train, columns=("review", "education", "gender", "age")).head()

Unnamed: 0,review,education,gender,age
0,1,1,0,77
1,2,0,0,45
2,2,1,1,74
3,0,2,1,15
4,0,2,0,18


In [73]:
le = LabelEncoder()
le.fit(Y_train)
Y_train = le.transform(Y_train)
Y_test = le.transform(Y_test)

In [83]:
pd.DataFrame(Y_train , columns=('purchased',)).head()

Unnamed: 0,purchased
0,0
1,0
2,1
3,1
4,1
