# Column Transformer using Sklearn

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [2]:
#loading data set

df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
#checking what types of value in data

df['day'].value_counts()

Sat     87
Sun     76
Thur    62
Fri     19
Name: day, dtype: int64

In [4]:
df['time'].value_counts()

Dinner    176
Lunch      68
Name: time, dtype: int64

In [5]:
df['smoker'].value_counts()

No     151
Yes     93
Name: smoker, dtype: int64

# Spliting X and Y

In [6]:

x = df.drop(columns=['tip','size'])
y = df[['tip']]
x.head()

Unnamed: 0,total_bill,sex,smoker,day,time
0,16.99,Female,No,Sun,Dinner
1,10.34,Male,No,Sun,Dinner
2,21.01,Male,No,Sun,Dinner
3,23.68,Male,No,Sun,Dinner
4,24.59,Female,No,Sun,Dinner


In [7]:
y.head()

Unnamed: 0,tip
0,1.01
1,1.66
2,3.5
3,3.31
4,3.61


## Traning and Testing


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=.20,random_state=1)

In [32]:
x_train.shape

(195, 5)

In [10]:
x_train.head()

Unnamed: 0,total_bill,sex,smoker,day,time
0,16.99,Female,No,Sun,Dinner
154,19.77,Male,No,Sun,Dinner
167,31.71,Male,No,Sun,Dinner
110,14.0,Male,No,Sat,Dinner
225,16.27,Female,Yes,Fri,Lunch


## Transforming...

In [28]:
from sklearn.compose import ColumnTransformer

In [51]:
transform = ColumnTransformer(transformers=[ 
    ('trans_1',OrdinalEncoder(categories=[['Lunch','Dinner']]),['time']),
    ('trans_2',OneHotEncoder(sparse=False,drop='first'),['sex','smoker','day'])
], remainder='passthrough')

In [52]:
x_train_new = transform.fit_transform(x_train).shape
x_train_new

(195, 7)

In [53]:
x_test_new = transformer.transform(x_test).shape
x_test_new

(49, 7)

In [54]:
x_train_new

(195, 7)