<a href="https://www.kaggle.com/code/piyushjain572/column-transformer?scriptVersionId=199302528" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np 
import pandas as pd 

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [None]:
df = pd.read_csv('/kaggle/input/covid-toy-dataset/covid_toy.csv')
df.sample(10)

In [None]:
df.isnull().sum()

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:5],
                                                 df.iloc[:,-1],
                                                 test_size=0.2,
                                                 random_state = 2)
print(X_train.shape)
X_train.head()

# Without Column Transformer

In [None]:
# applying Simple Imputer to Fever col.
si = SimpleImputer()
X_train_fever = si.fit_transform(X_train[['fever']])
X_test_fever = si.transform(X_test[['fever']])
print(X_train_fever.shape)

In [None]:
# applying ordinal encoding to cough col.
oe = OrdinalEncoder(categories=[['Mild','Strong']])
X_train_cough = oe.fit_transform(X_train[['cough']])
X_test_cough = oe.transform(X_test[['cough']])
print(X_train_cough.shape)

In [None]:
# applying OHE on city and gender col.
ohe = OneHotEncoder(drop='first',sparse_output=False)
X_train_city_gender = ohe.fit_transform(X_train[['gender','city']])
X_test_city_gender = ohe.transform(X_test[['gender','city']])
print(X_train_city_gender.shape)

In [None]:
# Extracting age in array from X_train, X_test
X_train_age = X_train[['age']].values
X_test_age = X_test[['age']].values
print(X_train_age.shape)

In [None]:
# Concatenating all arrays into one.
X_train_transformed = np.concatenate((X_train_age,X_train_fever,X_train_cough,X_train_city_gender),axis=1)
X_test_transformed = np.concatenate((X_test_age,X_test_fever,X_test_city_gender,X_test_cough),axis=1)

X_train_transformed.shape

# With Column Transformer

In [None]:
from sklearn.compose import ColumnTransformer

In [None]:
X_train.sample(1)

In [None]:
transformer = ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']]),['cough']),
    ('tnf3',OneHotEncoder(drop='first',sparse_output=False),['city','gender'])
],remainder = 'passthrough')

In [None]:
X_train_transformed = transformer.fit_transform(X_train)
print(X_train_transformed.shape)

In [None]:
X_test_transformed = transformer.transform(X_test)
print(X_test_transformed.shape)