<a href="https://www.kaggle.com/code/piyushjain572/column-transformer?scriptVersionId=199301931" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [9]:
import numpy as np 
import pandas as pd 

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [2]:
df = pd.read_csv('/kaggle/input/covid-toy-dataset/covid_toy.csv')
df.sample(10)

Unnamed: 0,age,gender,fever,cough,city,has_covid
54,60,Female,99.0,Mild,Mumbai,Yes
29,34,Female,,Strong,Mumbai,Yes
61,81,Female,98.0,Strong,Mumbai,No
26,19,Female,100.0,Mild,Kolkata,Yes
67,65,Male,99.0,Mild,Bangalore,No
95,12,Female,104.0,Mild,Bangalore,No
7,20,Female,,Strong,Mumbai,Yes
6,14,Male,101.0,Strong,Bangalore,No
57,49,Female,99.0,Strong,Bangalore,No
74,34,Female,104.0,Strong,Delhi,No


In [8]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [35]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:5],
                                                 df.iloc[:,-1],
                                                 test_size=0.2,
                                                 random_state = 2)
print(X_train.shape)
X_train.head()

(80, 5)


Unnamed: 0,age,gender,fever,cough,city
35,82,Female,102.0,Strong,Bangalore
11,65,Female,98.0,Mild,Mumbai
84,69,Female,98.0,Strong,Mumbai
44,20,Male,102.0,Strong,Delhi
73,34,Male,98.0,Strong,Kolkata


# Without Column Transformer

In [18]:
# applying Simple Imputer to Fever col.
si = SimpleImputer()
X_train_fever = si.fit_transform(X_train[['fever']])
X_test_fever = si.transform(X_test[['fever']])
print(X_train_fever.shape)

(80, 1)


In [20]:
# applying ordinal encoding to cough col.
oe = OrdinalEncoder(categories=[['Mild','Strong']])
X_train_cough = oe.fit_transform(X_train[['cough']])
X_test_cough = oe.transform(X_test[['cough']])
print(X_train_cough.shape)

(80, 1)


In [21]:
# applying OHE on city and gender col.
ohe = OneHotEncoder(drop='first',sparse_output=False)
X_train_city_gender = ohe.fit_transform(X_train[['gender','city']])
X_test_city_gender = ohe.transform(X_test[['gender','city']])
print(X_train_city_gender.shape)

(80, 4)


In [26]:
# Extracting age in array from X_train, X_test
X_train_age = X_train[['age']].values
X_test_age = X_test[['age']].values
print(X_train_age.shape)

(80, 1)


In [30]:
# Concatenating all arrays into one.
X_train_transformed = np.concatenate((X_train_age,X_train_fever,X_train_cough,X_train_city_gender),axis=1)
X_test_transformed = np.concatenate((X_test_age,X_test_fever,X_test_city_gender,X_test_cough),axis=1)

X_train_transformed.shape

(80, 7)

# With Column Transformer

In [31]:
from sklearn.compose import ColumnTransformer

In [38]:
X_train.sample(1)

Unnamed: 0,age,gender,fever,cough,city
77,8,Female,101.0,Mild,Kolkata


In [41]:
transformer = ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']]),['cough']),
    ('tnf3',OneHotEncoder(drop='first',sparse_output=False),['city','gender'])
],remainder = 'passthrough')

In [42]:
X_train_transformed = transformer.fit_transform(X_train)
print(X_train_transformed.shape)

(80, 7)


In [43]:
X_test_transformed = transformer.transform(X_test)
print(X_test_transformed.shape)

(20, 7)
