In [1]:
import pandas as pd

In [38]:
df = pd.DataFrame([
    ['green','M',10.1,'class1'],
    ['red', 'L', 13.5,'class2'],
    ['blue', 'XL', 15.3,'class1']])
df.columns = ['color', 'size', 'price', 'classlabel']
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


### Mapping Ordinal features

In [31]:
size_mapping = {
    'XL':3,
    'L':2,
    'M':1}
df['size']= df['size'].map(size_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class1
1,red,2,13.5,class2
2,blue,3,15.3,class1


In [12]:
inv_size_mapping = {v:k for k,v in size_mapping.items()}
df['size']= df['size'].map(inv_size_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


## Encoding class labels mapping

#### from scratch

In [13]:
import numpy as np
class_mapping = {label:idx for idx,label in enumerate(np.unique(df['classlabel']))}
class_mapping

{'class1': 0, 'class2': 1}

In [14]:
df['classlabel'] = df['classlabel'].map(class_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,0
1,red,L,13.5,1
2,blue,XL,15.3,0


In [15]:
inv_class_mapping = {k:v for v, k in class_mapping.items()}
df['classlabel'] = df['classlabel'].map(inv_class_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


####  using LabelEncoder

In [16]:
from sklearn.preprocessingprocessing import LabelEncoder

In [17]:
class_le = LabelEncoder()
y = class_le.fit_transform(df['classlabel'].values)
y

array([0, 1, 0], dtype=int64)

In [18]:
class_le.inverse_transform(y)

array(['class1', 'class2', 'class1'], dtype=object)

## Using One Hot Encoding on nominal Features

In [20]:
from sklearn.preprocessing import OneHotEncoder

In [39]:
size_mapping = {
    'XL':3,
    'L':2,
    'M':1}
df['size']= df['size'].map(size_mapping)

In [40]:
X = df[['color','size', 'price']].values
color_le = LabelEncoder()
X[:,0] = color_le.fit_transform(X[:,0])

In [41]:
X

array([[1, 1, 10.1],
       [2, 2, 13.5],
       [0, 3, 15.3]], dtype=object)

In [45]:
ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()

array([[  0. ,   1. ,   0. ,   1. ,  10.1],
       [  0. ,   0. ,   1. ,   2. ,  13.5],
       [  1. ,   0. ,   0. ,   3. ,  15.3]])

take note that OneHotEncoder only works with matrix of integers

#### With Pandas get_dummies

In [47]:
pd.get_dummies(df[['price','color','size']])

Unnamed: 0,price,size,color_blue,color_green,color_red
0,10.1,1,0,1,0
1,13.5,2,0,0,1
2,15.3,3,1,0,0
