In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.DataFrame({'Temp':['Hot','Cold','Warm','Very Cold','Hot','Cold','Very Hot','Hot','Warm','Hot'],
      'Color':['Red','Blue','Yellow','Yellow','Blue','Red','Blue','Blue','Yellow','Red']})

In [3]:
df.head()

Unnamed: 0,Temp,Color
0,Hot,Red
1,Cold,Blue
2,Warm,Yellow
3,Very Cold,Yellow
4,Hot,Blue


### Handling ordinal data

In [4]:
# Encoding based on hierarchy

res = {'Very Cold':1, 'Cold': 2, 'Warm': 3, 'Hot': 4, 'Very Hot': 5 }
df['Ordinal_Encoded_Temp'] = df['Temp'].map(res)

In [5]:
df

Unnamed: 0,Temp,Color,Ordinal_Encoded_Temp
0,Hot,Red,4
1,Cold,Blue,2
2,Warm,Yellow,3
3,Very Cold,Yellow,1
4,Hot,Blue,4
5,Cold,Red,2
6,Very Hot,Blue,5
7,Hot,Blue,4
8,Warm,Yellow,3
9,Hot,Red,4


In [6]:
# Encoding using label encoding

LE = LabelEncoder()
df['Label_Encoded_color'] = LE.fit_transform(df['Color'])

In [7]:
# Using one hot encoding

df = pd.get_dummies(df,columns=['Color'])

In [8]:
df

Unnamed: 0,Temp,Ordinal_Encoded_Temp,Label_Encoded_color,Color_Blue,Color_Red,Color_Yellow
0,Hot,4,1,0,1,0
1,Cold,2,0,1,0,0
2,Warm,3,2,0,0,1
3,Very Cold,1,2,0,0,1
4,Hot,4,0,1,0,0
5,Cold,2,1,0,1,0
6,Very Hot,5,0,1,0,0
7,Hot,4,0,1,0,0
8,Warm,3,2,0,0,1
9,Hot,4,1,0,1,0


### Practicing the encoding techniuqes using sklearn, pandas, category_encoders

In [9]:
df1 = pd.DataFrame({'Temperature': ['Hot', 'Cold', 'Very Hot','Warm','Hot','Warm','Warm','Hot','Hot','Cold'],
                    'Color':['Red','Yellow','Blue','Blue','Red','Yellow','Red','Yellow','Yellow','Yellow'],
                    'Target': [1,1,1,0,1,0,1,0,1,1]},
                     columns = ['Temperature', 'Color', 'Target'])

In [10]:
df1

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


#### One Hot Encoding

In [12]:
# using pandas get_dummies

df1 = pd.get_dummies(df1, prefix='Temp', columns=['Temperature'])
df1

Unnamed: 0,Color,Target,Temp_Cold,Temp_Hot,Temp_Very Hot,Temp_Warm
0,Red,1,0,1,0,0
1,Yellow,1,1,0,0,0
2,Blue,1,0,0,1,0
3,Blue,0,0,0,0,1
4,Red,1,0,1,0,0
5,Yellow,0,0,0,0,1
6,Red,1,0,0,0,1
7,Yellow,0,0,1,0,0
8,Yellow,1,0,1,0,0
9,Yellow,1,1,0,0,0


In [15]:
'''Using scikit learn preprocessing tools (OneHotEncoder)--- this method does not create additional
column unlike the pandas get_dummies method'''

from sklearn.preprocessing import OneHotEncoder
df = pd.DataFrame({'Temperature': ['Hot', 'Cold', 'Very Hot','Warm','Hot','Warm','Warm','Hot','Hot','Cold'],
                    'Color':['Red','Yellow','Blue','Blue','Red','Yellow','Red','Yellow','Yellow','Yellow'],},
                     columns = ['Temperature', 'Color'])


# ohc = OneHotEncoder()
# ohe = ohc.fit_transform(df, )

In [24]:
df['Temperature'].values.reshape(-1,1).toarray()

AttributeError: 'numpy.ndarray' object has no attribute 'toarray'