<a href="https://colab.research.google.com/github/Sikandarh11/Feature-Engineering/blob/main/Column_Transformation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [None]:
df = pd.read_csv("/content/covid_toy.csv")

In [None]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [None]:
df['cough'].value_counts()

Mild      62
Strong    38
Name: cough, dtype: int64

###Gender & City = OnehotEncoding (Nominal col)
### Cough  = OrdinalEncoding (Ordinal col)
### Has_covid = LabelEncoder (Label Col)

In [None]:
df[['city', 'gender']]

Unnamed: 0,city,gender
0,Kolkata,Male
1,Delhi,Male
2,Delhi,Male
3,Kolkata,Female
4,Mumbai,Female
...,...,...
95,Bangalore,Female
96,Kolkata,Female
97,Bangalore,Female
98,Mumbai,Female


In [None]:
trans = SimpleImputer()
simIm = (trans.fit_transform(df[['fever']])).astype(int)
df['fever'] = (trans.fit_transform(df[['fever']])).astype(int)
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

In [None]:
ohe = OneHotEncoder(drop = "first", sparse = False, dtype=  np.int32)
ohe.fit(df[['gender', 'city']])
ohe_t = ohe.transform(df[['gender', 'city']])
ohe_t.shape



(100, 4)

In [None]:
oe = OrdinalEncoder(categories = [['Mild', 'Strong']], dtype = np.int32)
oe_t = oe.fit_transform(df[['cough']])

In [None]:
x = np.concatenate((np.array(df[['age']], dtype = np.int32), simIm, oe_t, ohe_t), axis=1)

# **Using Sklean.compose.ColumnTransformer**

In [None]:
lb = LabelEncoder()
le = lb.fit_transform(df['has_covid'])
le.shape

(100,)

In [None]:
colTran = ColumnTransformer(transformers=
                            [("t1", OneHotEncoder(), ['gender', 'city']),
                             ("t2", OrdinalEncoder(), ['cough']),
                             ("t3", SimpleImputer(), ['fever']),
                            ],
                            remainder = "passthrough")

In [None]:
x_trans = colTran.fit_transform(df.drop('has_covid', axis=1))
x_trans.shape

(100, 9)

In [None]:
x_t = (np.concatenate((x_trans, le.reshape(-1,1)), axis=1)).astype(int)

In [None]:
x_t

array([[  0,   1,   0,   0,   1,   0,   0, 103,  60,   0],
       [  0,   1,   0,   1,   0,   0,   0, 100,  27,   1],
       [  0,   1,   0,   1,   0,   0,   0, 101,  42,   0],
       [  1,   0,   0,   0,   1,   0,   0,  98,  31,   0],
       [  1,   0,   0,   0,   0,   1,   0, 101,  65,   0],
       [  1,   0,   1,   0,   0,   0,   0, 100,  84,   1],
       [  0,   1,   1,   0,   0,   0,   1, 101,  14,   0],
       [  1,   0,   0,   0,   0,   1,   1, 100,  20,   1],
       [  1,   0,   1,   0,   0,   0,   1, 100,  19,   0],
       [  1,   0,   0,   1,   0,   0,   0, 101,  64,   0],
       [  1,   0,   0,   1,   0,   0,   0, 100,  75,   0],
       [  1,   0,   0,   0,   0,   1,   0,  98,  65,   1],
       [  1,   0,   0,   0,   1,   0,   1,  99,  25,   0],
       [  0,   1,   1,   0,   0,   0,   0, 102,  64,   1],
       [  0,   1,   1,   0,   0,   0,   0, 104,  51,   0],
       [  0,   1,   0,   0,   1,   0,   1, 103,  70,   1],
       [  1,   0,   0,   0,   1,   0,   0, 103,  69,   1