# Chapter 2
## Section: Data transformation
Implementation of label, one hot and target encoding in Python.

In [None]:
# Installing category_encoders library
!pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting category_encoders
  Downloading category_encoders-2.5.1.post0-py2.py3-none-any.whl (72 kB)
[K     |████████████████████████████████| 72 kB 703 kB/s 
Installing collected packages: category-encoders
Successfully installed category-encoders-2.5.1.post0


In [None]:
import pandas as pd
# defining the original dataframe
orig_df = pd.DataFrame({'age': [45, 43, 54, 56, 54, 52, 41],
                        'gender': ['M', 'F', 'F', 'M', 'M', 'F', 'M'],
                        'group': ['H1', 'H1', 'H2', 'H3', 'H2', 'H1', 'H3'],
                        'target': [0, 0, 1, 0, 1, 1, 0]})
# encoding using label encoding
from sklearn.preprocessing import LabelEncoder
# initializing LabelEncoder
le = LabelEncoder()
# encoding gender and group columns
label_encoded_df = orig_df.copy()
label_encoded_df['gender'] = le.fit_transform(label_encoded_df.gender)
label_encoded_df['group'] = le.fit_transform(label_encoded_df.group)
print(label_encoded_df)

# encoding using one hot encoding
from sklearn.preprocessing import OneHotEncoder
# initializing OneHotEncoder
ohe = OneHotEncoder(categories = 'auto')
# encoding gender column
gender_ohe = ohe.fit_transform(orig_df['gender'].values.reshape(-1,1)).toarray()
gender_ohe_df = pd.DataFrame(gender_ohe)
# encoding group column
group_ohe = ohe.fit_transform(orig_df['group'].values.reshape(-1,1)).toarray()
group_ohe_df = pd.DataFrame(group_ohe)
# generating the new dataframe with one hot encoded features
onehot_encoded_df = pd.concat([orig_df, gender_ohe_df, group_ohe_df], axis =1)
onehot_encoded_df = onehot_encoded_df.drop(['gender', 'group'], axis=1)
onehot_encoded_df.columns = ['age','target','M', 'F','H1','H2', 'H3']
print(onehot_encoded_df)

# encoding using target encoding
from category_encoders import TargetEncoder
# initializing LabelEncoder
te = TargetEncoder()
# encoding gender and group columns
target_encoded_df = orig_df.copy()
target_encoded_df['gender'] = te.fit_transform(orig_df['gender'], orig_df['target'])
target_encoded_df['group'] = te.fit_transform(orig_df['group'], orig_df['target'])
print(target_encoded_df)

   age  gender  group  target
0   45       1      0       0
1   43       0      0       0
2   54       0      1       1
3   56       1      2       0
4   54       1      1       1
5   52       0      0       1
6   41       1      2       0
   age  target    M    F   H1   H2   H3
0   45       0  0.0  1.0  1.0  0.0  0.0
1   43       0  1.0  0.0  1.0  0.0  0.0
2   54       1  1.0  0.0  0.0  1.0  0.0
3   56       0  0.0  1.0  0.0  0.0  1.0
4   54       1  0.0  1.0  0.0  1.0  0.0
5   52       1  1.0  0.0  1.0  0.0  0.0
6   41       0  0.0  1.0  0.0  0.0  1.0
   age    gender     group  target
0   45  0.258469  0.344686       0
1   43  0.638285  0.344686       0
2   54  0.638285  0.846319       1
3   56  0.258469  0.115261       0
4   54  0.258469  0.846319       1
5   52  0.638285  0.344686       1
6   41  0.258469  0.115261       0


