In [168]:
import pandas as pd
import numpy as np
import requests
from pandas.core.dtypes.common import is_numeric_dtype
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,OrdinalEncoder 

In [169]:
df = pd.read_csv("insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [170]:
df.isnull().sum() #no null values

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

## Replace
#### Sex: female - 0 , male - 1
#### smoker: yes - 0, no - 1
#### region: southwest - 0, southeast - 1, northwest - 2, northeast - 3 

In [171]:
df_r = df.copy()
for col in df_r:
    if is_numeric_dtype(df_r[col]):
        continue
    uni = df_r[col].unique()
    i = 0
    for u in uni:
        df_r[col] = df_r[col].replace(u,i)
        i += 1
df_r.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,0,0,16884.924
1,18,1,33.77,1,1,1,1725.5523
2,28,1,33.0,3,1,1,4449.462
3,33,1,22.705,0,1,2,21984.47061
4,32,1,28.88,0,1,2,3866.8552


# Label Encoder

In [172]:
#manually
df_le_man = df.copy()
LE = LabelEncoder()
df_le_man.sex = LE.fit_transform(df.sex)
df_le_man.smoker = LE.fit_transform(df.smoker)
df_le_man.region = LE.fit_transform(df.region)
df_le_man.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [173]:
#loop
df_le_loop = df.copy()
for col in df_le_loop:
    if is_numeric_dtype(df_le_loop[col]):
        continue
    df_le_loop[col] = LE.fit_transform(df[col])
df_le_loop.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


# One Hot Encoding

In [174]:
#manual
df_ohe_man = df.copy()
enc_data = ['sex','smoker','region']
OHE = OneHotEncoder(sparse_output=False,drop="first")
encoded=OHE.fit_transform(df[enc_data])
enc_df=pd.DataFrame(encoded,columns=['male','yes','northwest','southeast','southwest'])
for col in df_ohe_man:
    if  is_numeric_dtype(df_ohe_man[col]):
        continue
    df_ohe_man.drop(col,axis=1,inplace=True)
df_ohe_man = pd.concat([df_ohe_man,enc_df], axis=1) 
df_ohe_man.head(5)

Unnamed: 0,age,bmi,children,charges,male,yes,northwest,southeast,southwest
0,19,27.9,0,16884.924,0.0,1.0,0.0,0.0,1.0
1,18,33.77,1,1725.5523,1.0,0.0,0.0,1.0,0.0
2,28,33.0,3,4449.462,1.0,0.0,0.0,1.0,0.0
3,33,22.705,0,21984.47061,1.0,0.0,1.0,0.0,0.0
4,32,28.88,0,3866.8552,1.0,0.0,1.0,0.0,0.0


In [175]:
#loop
df_ohe_loop = df.copy()
for col in df_ohe_loop:
    if is_numeric_dtype(df_ohe_loop[col]):
        continue
    encoded_loop=OHE.fit_transform(df_ohe_loop[[col]])
    enc_loop_df=pd.DataFrame(encoded_loop,columns=df[col].unique()[1:])
    df_ohe_loop.drop(col,axis=1,inplace=True)
    df_ohe_loop = pd.concat([df_ohe_loop,enc_loop_df],axis=1)
df_ohe_loop.head(5)

Unnamed: 0,age,bmi,children,charges,male,no,southeast,northwest,northeast
0,19,27.9,0,16884.924,0.0,1.0,0.0,0.0,1.0
1,18,33.77,1,1725.5523,1.0,0.0,0.0,1.0,0.0
2,28,33.0,3,4449.462,1.0,0.0,0.0,1.0,0.0
3,33,22.705,0,21984.47061,1.0,0.0,1.0,0.0,0.0
4,32,28.88,0,3866.8552,1.0,0.0,1.0,0.0,0.0


## Using Dummies

In [176]:
#mannual
df_ohe_man = df.copy()
dum_sex = pd.get_dummies(df_ohe_man['sex'],drop_first=True) #encoding column and droping one encoded column 
dum_smo = pd.get_dummies(df_ohe_man['smoker'],drop_first=True) #encoding column and droping one encoded column 
dum_reg = pd.get_dummies(df_ohe_man['region'],drop_first=True) #encoding column and droping one encoded column 
for col in df_ohe_man:
    if  is_numeric_dtype(df_ohe_man[col]):
        continue
    df_ohe_man.drop(col,axis=1,inplace=True)
df_ohe_man = pd.concat([df_ohe_man, dum_sex,dum_smo,dum_reg], axis=1)
df_ohe_man.head()

Unnamed: 0,age,bmi,children,charges,male,yes,northwest,southeast,southwest
0,19,27.9,0,16884.924,0,1,0,0,1
1,18,33.77,1,1725.5523,1,0,0,1,0
2,28,33.0,3,4449.462,1,0,0,1,0
3,33,22.705,0,21984.47061,1,0,1,0,0
4,32,28.88,0,3866.8552,1,0,1,0,0


In [177]:
#loop
df_ohe_loop = df.copy()
for col in df_ohe_loop.columns:
    if is_numeric_dtype(df_ohe_loop[col]):
        continue
    dummies = pd.get_dummies(df_ohe_loop[col], drop_first=True)
    df_ohe_loop.drop(df_ohe_loop[[col]], axis=1,inplace=True)
    df_ohe_loop=pd.concat([df_ohe_loop,dummies], axis=1)
df_ohe_loop.head()

Unnamed: 0,age,bmi,children,charges,male,yes,northwest,southeast,southwest
0,19,27.9,0,16884.924,0,1,0,0,1
1,18,33.77,1,1725.5523,1,0,0,1,0
2,28,33.0,3,4449.462,1,0,0,1,0
3,33,22.705,0,21984.47061,1,0,1,0,0
4,32,28.88,0,3866.8552,1,0,1,0,0


# Ordinal Encoding

In [178]:
#mannual
df_oe_man = df.copy()
cols = ['sex','smoker','region']
OE = OrdinalEncoder()
df_oe_man[cols] = OE.fit_transform(df_oe_man[cols])
df_oe_man.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0.0,27.9,0,1.0,3.0,16884.924
1,18,1.0,33.77,1,0.0,2.0,1725.5523
2,28,1.0,33.0,3,0.0,2.0,4449.462
3,33,1.0,22.705,0,0.0,1.0,21984.47061
4,32,1.0,28.88,0,0.0,1.0,3866.8552


In [179]:
#using loop
df_oe_loop = df.copy()
for col in df_oe_loop:
    if is_numeric_dtype(df_oe_loop[col]):
        continue
    df_oe_loop[col] = OE.fit_transform(df_oe_loop[[col]])
df_oe_loop.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0.0,27.9,0,1.0,3.0,16884.924
1,18,1.0,33.77,1,0.0,2.0,1725.5523
2,28,1.0,33.0,3,0.0,2.0,4449.462
3,33,1.0,22.705,0,0.0,1.0,21984.47061
4,32,1.0,28.88,0,0.0,1.0,3866.8552
