# One Hot encoding 

In [180]:
import seaborn as sns
import pandas as pd 

df = sns.load_dataset('tips')

e_df = pd.get_dummies(df, columns=['sex', 'smoker', 'day'], drop_first=True, dtype=int)
e_df

Unnamed: 0,total_bill,tip,time,size,sex_Female,smoker_No,day_Fri,day_Sat,day_Sun
0,16.99,1.01,Dinner,2,1,1,0,0,1
1,10.34,1.66,Dinner,3,0,1,0,0,1
2,21.01,3.50,Dinner,3,0,1,0,0,1
3,23.68,3.31,Dinner,2,0,1,0,0,1
4,24.59,3.61,Dinner,4,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Dinner,3,0,1,0,1,0
240,27.18,2.00,Dinner,2,1,0,0,1,0
241,22.67,2.00,Dinner,2,0,0,0,1,0
242,17.82,1.75,Dinner,2,0,1,0,1,0


# OR using sklearn 

In [183]:
import pandas as pd 
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder

In [185]:
df = sns.load_dataset('tips')
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [187]:
encoder = OneHotEncoder()
encoded = encoder.fit_transform(df[['sex', 'smoker', 'day']]).toarray()

In [189]:
encoded

array([[1., 0., 1., ..., 0., 1., 0.],
       [0., 1., 1., ..., 0., 1., 0.],
       [0., 1., 1., ..., 0., 1., 0.],
       ...,
       [0., 1., 0., ..., 1., 0., 0.],
       [0., 1., 1., ..., 1., 0., 0.],
       [1., 0., 1., ..., 0., 0., 1.]])

In [191]:
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())
encoded_df

Unnamed: 0,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur
0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
4,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...
239,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
240,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
241,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
242,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [192]:
final_encoded_df = pd.concat([df, encoded_df], axis=1) 

In [195]:
final_encoded_df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,21.01,3.50,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
240,27.18,2.00,Female,Yes,Sat,Dinner,2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
242,17.82,1.75,Male,No,Sat,Dinner,2,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [197]:
final_encoded_df = final_encoded_df.drop(['sex', 'smoker', 'day'], axis=1)
final_encoded_df

Unnamed: 0,total_bill,tip,time,size,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur
0,16.99,1.01,Dinner,2,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,10.34,1.66,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,21.01,3.50,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,23.68,3.31,Dinner,2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
4,24.59,3.61,Dinner,4,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Dinner,3,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
240,27.18,2.00,Dinner,2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
241,22.67,2.00,Dinner,2,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
242,17.82,1.75,Dinner,2,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
