# LabelEncoding

In [0]:
import pandas as pd

In [3]:
df = pd.read_csv("chronic_kidney_disease.csv",
                 header=None,
                 names=['age', 'bp', 'sg', 'al', 
                        'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 
                        'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 
                        'wc', 'rc', 'htn', 'dm', 'cad', 'appet', 
                        'pe', 'ane', 'class'])
df.head()


Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,htn,dm,cad,appet,pe,ane,class
0,48,80,1.02,1,0,?,normal,notpresent,notpresent,121,36,1.2,?,?,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,7,50,1.02,4,0,?,normal,notpresent,notpresent,?,18,0.8,?,?,11.3,38,6000,?,no,no,no,good,no,no,ckd
2,62,80,1.01,2,3,normal,normal,notpresent,notpresent,423,53,1.8,?,?,9.6,31,7500,?,no,yes,no,poor,no,yes,ckd
3,48,70,1.005,4,0,normal,abnormal,present,notpresent,117,56,3.8,111,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,51,80,1.01,2,0,normal,normal,notpresent,notpresent,106,26,1.4,?,?,11.6,35,7300,4.6,no,no,no,good,no,no,ckd


In [4]:
# Identify categorical columns and create a list of them
#
categorical = df.dtypes ==object
categorical_cols = df.columns[categorical].tolist()
print(f"Categorical columns are: \n{categorical_cols}")

Categorical columns are: 
['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane', 'class']


In [0]:
# Instantiate labelencoder
#
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

In [6]:
# Apply the label encoder for each column
#
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

df[categorical_cols].head(10)

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,htn,dm,cad,appet,pe,ane,class
0,36,8,3,1,0,0,2,1,1,21,63,9,34,42,57,30,69,33,2,3,1,1,1,1,0
1,60,5,3,4,0,0,2,1,1,146,32,4,34,42,13,24,53,48,1,2,1,1,1,1,0
2,52,8,1,2,3,2,2,1,1,114,81,15,34,42,114,17,67,48,1,3,1,2,1,2,0
3,36,7,0,4,0,2,1,2,1,17,84,55,1,0,12,18,59,18,2,2,1,2,2,2,0
4,40,8,1,2,0,2,2,1,1,6,51,11,34,42,16,21,65,26,1,2,1,1,1,1,0
5,50,9,2,3,0,0,0,1,1,121,50,8,25,5,23,25,69,24,2,3,1,1,2,1,0
6,58,7,1,0,0,0,2,1,1,0,82,48,0,15,25,22,89,48,1,2,1,1,1,1,0
7,11,10,2,2,4,2,1,1,1,112,57,8,34,42,25,30,61,30,1,3,1,1,2,1,0
8,41,0,2,3,0,2,1,2,1,35,87,16,34,42,8,19,85,20,2,3,1,1,1,2,0
9,42,9,3,2,0,1,1,2,1,120,4,76,3,10,113,15,15,16,2,3,1,2,1,2,0


# OneHotEncoding

In [0]:
# import OneHotEncoder
from sklearn.preprocessing import OneHotEncoder

# Instantiate OneHotEncoder
ohe = OneHotEncoder()

encoded = ohe.fit_transform(df[categorical_cols].values.reshape(-1,1)).toarray()


In [10]:
dfOneHot = pd.DataFrame(encoded, columns = ['symptom_'+ str(int(i)) for i in range(encoded.shape[1])])

df = pd.concat([df, dfOneHot], axis=1)
df

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,htn,dm,cad,appet,pe,ane,class,symptom_0,symptom_1,symptom_2,symptom_3,symptom_4,symptom_5,symptom_6,symptom_7,symptom_8,symptom_9,symptom_10,symptom_11,symptom_12,symptom_13,symptom_14,...,symptom_107,symptom_108,symptom_109,symptom_110,symptom_111,symptom_112,symptom_113,symptom_114,symptom_115,symptom_116,symptom_117,symptom_118,symptom_119,symptom_120,symptom_121,symptom_122,symptom_123,symptom_124,symptom_125,symptom_126,symptom_127,symptom_128,symptom_129,symptom_130,symptom_131,symptom_132,symptom_133,symptom_134,symptom_135,symptom_136,symptom_137,symptom_138,symptom_139,symptom_140,symptom_141,symptom_142,symptom_143,symptom_144,symptom_145,symptom_146
0,36.0,8.0,3.0,1.0,0.0,0.0,2.0,1.0,1.0,21.0,63.0,9.0,34.0,42.0,57.0,30.0,69.0,33.0,2.0,3.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,60.0,5.0,3.0,4.0,0.0,0.0,2.0,1.0,1.0,146.0,32.0,4.0,34.0,42.0,13.0,24.0,53.0,48.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,52.0,8.0,1.0,2.0,3.0,2.0,2.0,1.0,1.0,114.0,81.0,15.0,34.0,42.0,114.0,17.0,67.0,48.0,1.0,3.0,1.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,36.0,7.0,0.0,4.0,0.0,2.0,1.0,2.0,1.0,17.0,84.0,55.0,1.0,0.0,12.0,18.0,59.0,18.0,2.0,2.0,1.0,2.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,40.0,8.0,1.0,2.0,0.0,2.0,2.0,1.0,1.0,6.0,51.0,11.0,34.0,42.0,16.0,21.0,65.0,26.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9996,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9997,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9998,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
