## Column Transformer in Machine Learning

In [1]:
import numpy as np
import pandas as pd

  from pandas.core import (


In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [3]:
df = pd.read_csv('covid_toy.csv')

In [4]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


#### Observation:
1. age & fever are num column
2. gender, city & has_covid are nominal columns
3. cough is ordinal column

In [5]:
df.cough.value_counts()

cough
Mild      62
Strong    38
Name: count, dtype: int64

In [6]:
df.city.value_counts()

city
Kolkata      32
Bangalore    30
Delhi        22
Mumbai       16
Name: count, dtype: int64

In [7]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:5], df.iloc[:,-1], test_size=0.2, random_state=42)

In [12]:
X_train

Unnamed: 0,age,gender,fever,cough,city
55,81,Female,101.0,Mild,Mumbai
88,5,Female,100.0,Mild,Kolkata
26,19,Female,100.0,Mild,Kolkata
42,27,Male,100.0,Mild,Delhi
69,73,Female,103.0,Mild,Delhi
...,...,...,...,...,...
60,24,Female,102.0,Strong,Bangalore
71,75,Female,104.0,Strong,Delhi
14,51,Male,104.0,Mild,Bangalore
92,82,Female,102.0,Strong,Kolkata


### Classical Method for data handling

In [15]:
# Missing value Imputation
si = SimpleImputer()

X_train_fever = si.fit_transform(X_train[['fever']])
X_train_fever.shape

(80, 1)

In [27]:
X_test_fever = si.transform(X_test[['fever']])
X_test_fever.shape

(20, 1)

In [18]:
# Ordinal Encoding
oe = OrdinalEncoder(categories=[['Mild', 'Strong']])

X_train_cough = oe.fit_transform(X_train[['cough']])

In [20]:
X_train_cough.shape

(80, 1)

In [None]:
X_test_cough = oe.transform(X_test[['cough']])
X_test_cough.shape

(20, 1)

In [21]:
# OneHotEncoding
ohe = OneHotEncoder(drop='first', sparse=False, dtype=np.int32)

X_train_nominal = ohe.fit_transform(X_train[['gender', 'city']])



In [22]:
X_train_nominal.shape

(80, 4)

In [32]:
X_test_nominal = ohe.transform(X_test[['gender', 'city']])
X_test_nominal.shape

(20, 4)

In [40]:
X_train_age = X_train[['age']]
X_train_age.shape

(80, 1)

In [41]:
X_test_age = X_test[['age']]
X_test_age.shape

(20, 1)

In [43]:
X_train_transformed = np.concatenate((X_train_age, X_train_fever, X_train_nominal, X_train_cough), axis=1)

X_test_transformed = np.concatenate((X_test_age, X_test_fever, X_test_nominal, X_test_cough), axis=1)


In [49]:
X_train_transformed

array([[ 81., 101.,   0.,   0.,   0.,   1.,   0.],
       [  5., 100.,   0.,   0.,   1.,   0.,   0.],
       [ 19., 100.,   0.,   0.,   1.,   0.,   0.],
       [ 27., 100.,   1.,   1.,   0.,   0.,   0.],
       [ 73., 103.,   0.,   1.,   0.,   0.,   0.],
       [ 70., 103.,   1.,   0.,   1.,   0.,   1.],
       [ 49., 102.,   0.,   1.,   0.,   0.,   0.],
       [ 51., 101.,   0.,   0.,   1.,   0.,   1.],
       [ 64., 101.,   0.,   1.,   0.,   0.,   0.],
       [ 83., 101.,   0.,   0.,   1.,   0.,   0.],
       [ 65.,  98.,   0.,   0.,   0.,   1.,   0.],
       [ 18., 104.,   0.,   0.,   0.,   0.,   0.],
       [ 16., 103.,   0.,   0.,   0.,   0.,   0.],
       [ 16., 104.,   1.,   0.,   1.,   0.,   0.],
       [ 27., 100.,   1.,   0.,   1.,   0.,   0.],
       [ 84., 101.,   0.,   0.,   0.,   0.,   0.],
       [ 51., 104.,   1.,   0.,   1.,   0.,   0.],
       [ 69., 102.,   0.,   0.,   0.,   0.,   0.],
       [ 82., 102.,   0.,   0.,   0.,   0.,   1.],
       [ 69., 103.,   0.,   0.,

### Advanced Method for data handling

In [46]:
from sklearn.compose import ColumnTransformer

transformer = ColumnTransformer(transformers=[
    ('si', SimpleImputer(), ['fever']),
    ('oe', OrdinalEncoder(categories=[['Mild', 'Strong']]), ['cough']),
    ('ohe', OneHotEncoder(drop='first', sparse=False), ['gender', 'city'])
    ],remainder='passthrough')

In [47]:
transformer.fit(X_train)



In [50]:
X_train_transformed2 = transformer.transform(X_train)

In [51]:
X_test_transformed2 = transformer.transform(X_test)